Problem: Validation code not optimized (#2490)

Solution: memoize data
2024-10-13 13:34:05 +00:00 · 2018-09-04 15:30:52 +02:00 · 2018-09-04 15:30:52 +02:00 · cb22557771
commit cb22557771
parent c72c7a4626
10 changed files with 195 additions and 25 deletions
--- a/bigchaindb/backend/localmongodb/query.py
+++ b/bigchaindb/backend/localmongodb/query.py
@ -91,10 +91,8 @@ def get_assets(conn, asset_ids):

@register_query(LocalMongoDBConnection)
 def get_spent(conn, transaction_id, output):
-    query = {'inputs.fulfills': {
-        'transaction_id': transaction_id,
-        'output_index': output}}
-
+    query = {'inputs.fulfills': {'transaction_id': transaction_id,
+                                 'output_index': output}}
    return conn.run(
        conn.collection('transactions')
            .find(query, {'_id': 0}))
--- a/bigchaindb/common/memoize.py
+++ b/bigchaindb/common/memoize.py
@ -0,0 +1,58 @@
+import functools
+import codecs
+from functools import lru_cache
+
+
+class HDict(dict):
+    def __hash__(self):
+        return hash(codecs.decode(self['id'], 'hex'))
+
+
+@lru_cache(maxsize=16384)
+def from_dict(func, *args, **kwargs):
+    return func(*args, **kwargs)
+
+
+def memoize_from_dict(func):
+
+    @functools.wraps(func)
+    def memoized_func(*args, **kwargs):
+
+        if args[1].get('id', None):
+            args = list(args)
+            args[1] = HDict(args[1])
+            new_args = tuple(args)
+            return from_dict(func, *new_args, **kwargs)
+        else:
+            return func(*args, **kwargs)
+
+    return memoized_func
+
+
+class ToDictWrapper():
+    def __init__(self, tx):
+        self.tx = tx
+
+    def __eq__(self, other):
+        return self.tx.id == other.tx.id
+
+    def __hash__(self):
+        return hash(self.tx.id)
+
+
+@lru_cache(maxsize=16384)
+def to_dict(func, tx_wrapped):
+    return func(tx_wrapped.tx)
+
+
+def memoize_to_dict(func):
+
+    @functools.wraps(func)
+    def memoized_func(*args, **kwargs):
+
+        if args[0].id:
+            return to_dict(func, ToDictWrapper(args[0]))
+        else:
+            return func(*args, **kwargs)
+
+    return memoized_func
--- a/bigchaindb/common/transaction.py
+++ b/bigchaindb/common/transaction.py
@ -12,7 +12,8 @@ Attributes:
 """
 from collections import namedtuple
 from copy import deepcopy
-from functools import reduce
+from functools import reduce, lru_cache
+import rapidjson

 import base58
 from cryptoconditions import Fulfillment, ThresholdSha256, Ed25519Sha256
@ -27,6 +28,7 @@ from bigchaindb.common.exceptions import (KeypairMismatchException,
                                          AmountError, AssetIdMismatch,
                                          ThresholdTooDeep)
 from bigchaindb.common.utils import serialize
+from .memoize import memoize_from_dict, memoize_to_dict


 UnspentOutput = namedtuple(
@ -82,6 +84,11 @@ class Input(object):
        # TODO: If `other !== Fulfillment` return `False`
        return self.to_dict() == other.to_dict()

+    # NOTE: This function is used to provide a unique key for a given
+    # Input to suppliment memoization
+    def __hash__(self):
+        return hash((self.fulfillment, self.fulfills))
+
    def to_dict(self):
        """Transforms the object to a Python dictionary.

@ -500,7 +507,7 @@ class Transaction(object):
    VERSION = '2.0'

    def __init__(self, operation, asset, inputs=None, outputs=None,
-                 metadata=None, version=None, hash_id=None):
+                 metadata=None, version=None, hash_id=None, tx_dict=None):
        """The constructor allows to create a customizable Transaction.

            Note:
@ -553,6 +560,7 @@ class Transaction(object):
        self.outputs = outputs or []
        self.metadata = metadata
        self._id = hash_id
+        self.tx_dict = tx_dict

    @property
    def unspent_outputs(self):
@ -990,7 +998,7 @@ class Transaction(object):
            raise ValueError('Inputs and '
                             'output_condition_uris must have the same count')

-        tx_dict = self.to_dict()
+        tx_dict = self.tx_dict if self.tx_dict else self.to_dict()
        tx_dict = Transaction._remove_signatures(tx_dict)
        tx_dict['id'] = None
        tx_serialized = Transaction._to_str(tx_dict)
@ -1003,6 +1011,7 @@ class Transaction(object):
        return all(validate(i, cond)
                   for i, cond in enumerate(output_condition_uris))

+    @lru_cache(maxsize=16384)
    def _input_valid(self, input_, operation, message, output_condition_uri=None):
        """Validates a single Input against a single Output.

@ -1048,6 +1057,11 @@ class Transaction(object):
        ffill_valid = parsed_ffill.validate(message=message.digest())
        return output_valid and ffill_valid

+    # This function is required by `lru_cache` to create a key for memoization
+    def __hash__(self):
+        return hash(self.id)
+
+    @memoize_to_dict
    def to_dict(self):
        """Transforms the object to a Python dictionary.

@ -1150,7 +1164,9 @@ class Transaction(object):
                tx_body (dict): The Transaction to be transformed.
        """
        # NOTE: Remove reference to avoid side effects
-        tx_body = deepcopy(tx_body)
+        # tx_body = deepcopy(tx_body)
+        tx_body = rapidjson.loads(rapidjson.dumps(tx_body))
+
        try:
            proposed_tx_id = tx_body['id']
        except KeyError:
@ -1167,6 +1183,7 @@ class Transaction(object):
            raise InvalidHash(err_msg.format(proposed_tx_id))

    @classmethod
+    @memoize_from_dict
    def from_dict(cls, tx, skip_schema_validation=True):
        """Transforms a Python dictionary to a Transaction object.

@ -1184,7 +1201,7 @@ class Transaction(object):
        inputs = [Input.from_dict(input_) for input_ in tx['inputs']]
        outputs = [Output.from_dict(output) for output in tx['outputs']]
        return cls(tx['operation'], tx['asset'], inputs, outputs,
-                   tx['metadata'], tx['version'], hash_id=tx['id'])
+                   tx['metadata'], tx['version'], hash_id=tx['id'], tx_dict=tx)

    @classmethod
    def from_db(cls, bigchain, tx_dict_list):
--- a/bigchaindb/lib.py
+++ b/bigchaindb/lib.py
@ -9,6 +9,7 @@ MongoDB.
 import logging
 from collections import namedtuple
 from uuid import uuid4
+import rapidjson

 try:
    from hashlib import sha3_256
@ -77,10 +78,11 @@ class BigchainDB(object):
            raise ValidationError('Mode must be one of the following {}.'
                                  .format(', '.join(self.mode_list)))

+        tx_dict = transaction.tx_dict if transaction.tx_dict else transaction.to_dict()
        payload = {
            'method': mode,
            'jsonrpc': '2.0',
-            'params': [encode_transaction(transaction.to_dict())],
+            'params': [encode_transaction(tx_dict)],
            'id': str(uuid4())
        }
        # TODO: handle connection errors!
@ -122,10 +124,9 @@ class BigchainDB(object):
        txns = []
        assets = []
        txn_metadatas = []
-        for transaction_obj in transactions:
-            # self.update_utxoset(transaction)
-            transaction = transaction_obj.to_dict()
-            if transaction['operation'] == transaction_obj.CREATE:
+        for t in transactions:
+            transaction = t.tx_dict if t.tx_dict else rapidjson.loads(rapidjson.dumps(t.to_dict()))
+            if transaction['operation'] == t.CREATE:
                asset = transaction.pop('asset')
                asset['id'] = transaction['id']
                assets.append(asset)
@ -224,6 +225,10 @@ class BigchainDB(object):
            return backend.query.delete_unspent_outputs(
                                        self.connection, *unspent_outputs)

+    def is_committed(self, transaction_id):
+        transaction = backend.query.get_transaction(self.connection, transaction_id)
+        return bool(transaction)
+
    def get_transaction(self, transaction_id):
        transaction = backend.query.get_transaction(self.connection, transaction_id)

--- a/bigchaindb/models.py
+++ b/bigchaindb/models.py
@ -27,7 +27,7 @@ class Transaction(Transaction):

        if self.operation == Transaction.CREATE:
            duplicates = any(txn for txn in current_transactions if txn.id == self.id)
-            if bigchain.get_transaction(self.to_dict()['id']) or duplicates:
+            if bigchain.is_committed(self.id) or duplicates:
                raise DuplicateTransaction('transaction `{}` already exists'
                                           .format(self.id))

--- a/bigchaindb/upsert_validator/validator_election.py
+++ b/bigchaindb/upsert_validator/validator_election.py
@ -35,12 +35,6 @@ class ValidatorElection(Transaction):
    INCONCLUSIVE = 'inconclusive'
    ELECTION_THRESHOLD = 2 / 3

-    def __init__(self, operation, asset, inputs, outputs,
-                 metadata=None, version=None, hash_id=None):
-        # operation `CREATE` is being passed as argument as `VALIDATOR_ELECTION` is an extension
-        # of `CREATE` and any validation on `CREATE` in the parent class should apply to it
-        super().__init__(operation, asset, inputs, outputs, metadata, version, hash_id)
-
    @classmethod
    def get_validator_change(cls, bigchain, height=None):
        """Return the latest change to the validator set
--- a/tests/backend/localmongodb/test_queries.py
+++ b/tests/backend/localmongodb/test_queries.py
@ -205,7 +205,7 @@ def test_get_owned_ids(signed_create_tx, user_pk):
    conn = connect()

    # insert a transaction
-    conn.db.transactions.insert_one(signed_create_tx.to_dict())
+    conn.db.transactions.insert_one(deepcopy(signed_create_tx.to_dict()))

    txns = list(query.get_owned_ids(conn, user_pk))

@ -224,7 +224,7 @@ def test_get_spending_transactions(user_pk, user_sk):
    tx2 = Transaction.transfer([inputs[0]], out, tx1.id).sign([user_sk])
    tx3 = Transaction.transfer([inputs[1]], out, tx1.id).sign([user_sk])
    tx4 = Transaction.transfer([inputs[2]], out, tx1.id).sign([user_sk])
-    txns = [tx.to_dict() for tx in [tx1, tx2, tx3, tx4]]
+    txns = [deepcopy(tx.to_dict()) for tx in [tx1, tx2, tx3, tx4]]
    conn.db.transactions.insert_many(txns)

    links = [inputs[0].fulfills.to_dict(), inputs[2].fulfills.to_dict()]
--- a/tests/common/test_memoize.py
+++ b/tests/common/test_memoize.py
@ -0,0 +1,92 @@
+# Copyright BigchainDB GmbH and BigchainDB contributors
+# SPDX-License-Identifier: (Apache-2.0 AND CC-BY-4.0)
+# Code is Apache-2.0 and docs are CC-BY-4.0
+
+import pytest
+from copy import deepcopy
+
+from bigchaindb.models import Transaction
+from bigchaindb.common.crypto import generate_key_pair
+from bigchaindb.common.memoize import to_dict, from_dict
+
+
+pytestmark = [pytest.mark.tendermint, pytest.mark.bdb]
+
+
+def test_memoize_to_dict(b):
+    alice = generate_key_pair()
+    asset = {
+        'data': {'id': 'test_id'},
+    }
+
+    assert to_dict.cache_info().hits == 0
+    assert to_dict.cache_info().misses == 0
+
+    tx = Transaction.create([alice.public_key],
+                            [([alice.public_key], 1)],
+                            asset=asset,)\
+                    .sign([alice.private_key])
+
+    tx.to_dict()
+
+    assert to_dict.cache_info().hits == 0
+    assert to_dict.cache_info().misses == 1
+
+    tx.to_dict()
+    tx.to_dict()
+
+    assert to_dict.cache_info().hits == 2
+    assert to_dict.cache_info().misses == 1
+
+
+def test_memoize_from_dict(b):
+    alice = generate_key_pair()
+    asset = {
+        'data': {'id': 'test_id'},
+    }
+
+    assert from_dict.cache_info().hits == 0
+    assert from_dict.cache_info().misses == 0
+
+    tx = Transaction.create([alice.public_key],
+                            [([alice.public_key], 1)],
+                            asset=asset,)\
+                    .sign([alice.private_key])
+    tx_dict = deepcopy(tx.to_dict())
+
+    Transaction.from_dict(tx_dict)
+
+    assert from_dict.cache_info().hits == 0
+    assert from_dict.cache_info().misses == 1
+
+    Transaction.from_dict(tx_dict)
+    Transaction.from_dict(tx_dict)
+
+    assert from_dict.cache_info().hits == 2
+    assert from_dict.cache_info().misses == 1
+
+
+def test_memoize_input_valid(b):
+    alice = generate_key_pair()
+    asset = {
+        'data': {'id': 'test_id'},
+    }
+
+    assert Transaction._input_valid.cache_info().hits == 0
+    assert Transaction._input_valid.cache_info().misses == 0
+
+    tx = Transaction.create([alice.public_key],
+                            [([alice.public_key], 1)],
+                            asset=asset,)\
+                    .sign([alice.private_key])
+
+    tx.inputs_valid()
+
+    assert Transaction._input_valid.cache_info().hits == 0
+    assert Transaction._input_valid.cache_info().misses == 1
+
+    tx.inputs_valid()
+    tx.inputs_valid()
+
+    assert Transaction._input_valid.cache_info().hits == 2
+    assert Transaction._input_valid.cache_info().misses == 1
--- a/tests/common/test_transaction.py
+++ b/tests/common/test_transaction.py
@ -13,7 +13,7 @@ from cryptoconditions import Ed25519Sha256
 from pytest import mark, raises
 from sha3 import sha3_256

-pytestmark = mark.tendermint
+pytestmark = [mark.tendermint, mark.bdb]


 def test_input_serialization(ffill_uri, user_pub):
@ -533,7 +533,7 @@ def test_validate_input_with_invalid_parameters(utx):
    input_conditions = [out.fulfillment.condition_uri for out in utx.outputs]
    tx_dict = utx.to_dict()
    tx_serialized = Transaction._to_str(tx_dict)
-    valid = utx._input_valid(utx.inputs[0], tx_serialized, input_conditions)
+    valid = utx._input_valid(utx.inputs[0], tx_serialized, input_conditions[0])
    assert not valid


--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -144,11 +144,17 @@ def _bdb(_setup_database, _configure_bigchaindb):
    from bigchaindb import config
    from bigchaindb.backend import connect
    from .utils import flush_db
+    from bigchaindb.common.memoize import to_dict, from_dict
+    from bigchaindb.models import Transaction
    conn = connect()
    yield
    dbname = config['database']['name']
    flush_db(conn, dbname)

+    to_dict.cache_clear()
+    from_dict.cache_clear()
+    Transaction._input_valid.cache_clear()
+

 # We need this function to avoid loading an existing
 # conf file located in the home of the user running