From 6f2930eafbef756cfa3c7d4b453e57bce8b1b6a7 Mon Sep 17 00:00:00 2001
From: Vanshdeep Singh <kansi13@gmail.com>
Date: Wed, 29 Aug 2018 13:56:18 +0200
Subject: [PATCH] Problem: Transaction validation is not optimal

Solution: Memoize operations which generate same results
---
 bigchaindb/common/memoize.py     | 42 ++++++++++++++++++
 bigchaindb/common/transaction.py | 76 ++++++++------------------------
 bigchaindb/lib.py                |  6 +--
 3 files changed, 64 insertions(+), 60 deletions(-)
 create mode 100644 bigchaindb/common/memoize.py

diff --git a/bigchaindb/common/memoize.py b/bigchaindb/common/memoize.py
new file mode 100644
index 00000000..f52c1dc3
--- /dev/null
+++ b/bigchaindb/common/memoize.py
@@ -0,0 +1,42 @@
+import functools
+import codecs
+from functools import lru_cache
+
+
+class HDict(dict):
+    def __hash__(self):
+        return  int.from_bytes(codecs.decode(self['id'], 'hex'), 'big')
+
+
+@lru_cache(maxsize=16384)
+def from_dict(func, *args, **kwargs):
+    return func(*args, **kwargs)
+
+
+def memoize_from_dict(func):
+
+    @functools.wraps(func)
+    def memoized_func(*args, **kwargs):
+        print(args)
+        new_args = (args[0], HDict(args[1]), args[2])
+        print(new_args)
+        return from_dict(func, *new_args, **kwargs)
+
+    return memoized_func
+
+
+@lru_cache(maxsize=16384)
+def to_dict(func, *args, **kwargs):
+    return func(*args, **kwargs)
+
+
+def memoize_to_dict(func):
+
+    @functools.wraps(func)
+    def memoized_func(*args, **kwargs):
+        if args[0].id:
+            return to_dict(func, *args, **kwargs)
+        else:
+            return func(*args, **kwargs)
+
+    return memoized_func
diff --git a/bigchaindb/common/transaction.py b/bigchaindb/common/transaction.py
index 53ba0abd..cccc9078 100644
--- a/bigchaindb/common/transaction.py
+++ b/bigchaindb/common/transaction.py
@@ -12,9 +12,9 @@ Attributes:
 """
 from collections import namedtuple
 from copy import deepcopy
-from functools import reduce
+from functools import reduce, lru_cache
 import functools
-import ujson
+import rapidjson
 
 import base58
 from cryptoconditions import Fulfillment, ThresholdSha256, Ed25519Sha256
@@ -29,6 +29,7 @@ from bigchaindb.common.exceptions import (KeypairMismatchException,
                                           AmountError, AssetIdMismatch,
                                           ThresholdTooDeep)
 from bigchaindb.common.utils import serialize
+from .memoize import memoize_from_dict, memoize_to_dict
 
 
 UnspentOutput = namedtuple(
@@ -44,55 +45,6 @@ UnspentOutput = namedtuple(
 )
 
 
-def memoize(func):
-    cache = func.cache = {}
-
-    @functools.wraps(func)
-    def memoized_func(*args, **kwargs):
-        key = args[1]['id']
-        if key not in cache:
-            cache[key] = func(*args, **kwargs)
-
-        return cache[key]
-
-    return memoized_func
-
-
-def memoize_class(func):
-    cache = func.cache = {}
-
-    @functools.wraps(func)
-    def memoized_func(*args, **kwargs):
-        key = args[0]._id
-        if key is None:
-            result = func(*args, **kwargs)
-            cache[result['id']] = result
-            return result
-        elif key not in cache:
-            cache[key] = func(*args, **kwargs)
-
-        return cache[key]
-
-    return memoized_func
-
-
-def memoize_input_valid(func):
-    cache = func.cache = {}
-
-    @functools.wraps(func)
-    def memoized_func(*args, **kwargs):
-        inp_fulfillment = args[1].fulfillment
-        op = args[2]
-        msg = args[3]
-        key = '{}.{}.{}'.format(inp_fulfillment, op, msg)
-        if key not in cache:
-            cache[key] = func(*args, **kwargs)
-
-        return cache[key]
-
-    return memoized_func
-
-
 class Input(object):
     """A Input is used to spend assets locked by an Output.
 
@@ -133,6 +85,11 @@ class Input(object):
         # TODO: If `other !== Fulfillment` return `False`
         return self.to_dict() == other.to_dict()
 
+    # NOTE: This function is used to provide a unique key for a given
+    # Input to suppliment memoization
+    def __hash__(self):
+        return hash((self.fulfillment, self.fulfills))
+
     def to_dict(self):
         """Transforms the object to a Python dictionary.
 
@@ -1042,7 +999,7 @@ class Transaction(object):
             raise ValueError('Inputs and '
                              'output_condition_uris must have the same count')
 
-        tx_dict = self.tx_dict  # self.to_dict()
+        tx_dict = self.tx_dict if self.tx_dict else self.to_dict()
         tx_dict = Transaction._remove_signatures(tx_dict)
         tx_dict['id'] = None
         tx_serialized = Transaction._to_str(tx_dict)
@@ -1055,7 +1012,8 @@ class Transaction(object):
         return all(validate(i, cond)
                    for i, cond in enumerate(output_condition_uris))
 
-    @memoize_input_valid
+    # @memoize_input_valid
+    @lru_cache(maxsize=16384)
     def _input_valid(self, input_, operation, message, output_condition_uri=None):
         """Validates a single Input against a single Output.
 
@@ -1101,7 +1059,11 @@ class Transaction(object):
         ffill_valid = parsed_ffill.validate(message=message.digest())
         return output_valid and ffill_valid
 
-    @memoize_class
+    # This function is required by `lru_cache` to create a key for memoization
+    def __hash__(self):
+        return hash(self.id)
+
+    @memoize_to_dict
     def to_dict(self):
         """Transforms the object to a Python dictionary.
 
@@ -1205,8 +1167,8 @@ class Transaction(object):
         """
         # NOTE: Remove reference to avoid side effects
         # tx_body = deepcopy(tx_body)
-        # tx_body = rapidjson.loads(rapidjson.dumps(tx_body))
-        tx_body = ujson.loads(ujson.dumps(tx_body))
+        tx_body = rapidjson.loads(rapidjson.dumps(tx_body))
+
         try:
             proposed_tx_id = tx_body['id']
         except KeyError:
@@ -1223,7 +1185,7 @@ class Transaction(object):
             raise InvalidHash(err_msg.format(proposed_tx_id))
 
     @classmethod
-    @memoize
+    @memoize_from_dict
     def from_dict(cls, tx, skip_schema_validation=True):
         """Transforms a Python dictionary to a Transaction object.
 
diff --git a/bigchaindb/lib.py b/bigchaindb/lib.py
index 235aa932..995a5418 100644
--- a/bigchaindb/lib.py
+++ b/bigchaindb/lib.py
@@ -122,10 +122,10 @@ class BigchainDB(object):
         txns = []
         assets = []
         txn_metadatas = []
-        for transaction_obj in transactions:
+        for t in transactions:
             # self.update_utxoset(transaction)
-            transaction = transaction_obj.tx_dict
-            if transaction['operation'] == transaction_obj.CREATE:
+            transaction = t.tx_dict if t.tx_dict else t.to_dict()
+            if transaction['operation'] == t.CREATE:
                 asset = transaction.pop('asset')
                 asset['id'] = transaction['id']
                 assets.append(asset)