Merge remote-tracking branch 'origin/feat/317/switch-to-rapidjson'

This commit is contained in:
vrde 2016-05-20 10:59:56 +02:00
commit 95fe5a77aa
No known key found for this signature in database
GPG Key ID: 6581C7C39B3D397D
3 changed files with 84 additions and 13 deletions

View File

@ -1,5 +1,4 @@
import copy
import json
import time
import contextlib
import threading
@ -7,6 +6,8 @@ import queue
import multiprocessing as mp
from datetime import datetime
import rapidjson
import cryptoconditions as cc
from cryptoconditions.exceptions import ParsingError
@ -109,8 +110,7 @@ def serialize(data):
str: JSON formatted string
"""
return json.dumps(data, skipkeys=False, ensure_ascii=False,
separators=(',', ':'), sort_keys=True)
return rapidjson.dumps(data, skipkeys=False, ensure_ascii=False, sort_keys=True)
def deserialize(data):
@ -123,7 +123,7 @@ def deserialize(data):
dict: dict resulting from the serialization of a JSON formatted string.
"""
return json.loads(data, encoding="utf-8")
return rapidjson.loads(data)
def timestamp():
@ -275,7 +275,7 @@ def create_tx(current_owners, new_owners, inputs, operation, payload=None):
conditions.append({
'new_owners': new_owners,
'condition': {
'details': json.loads(condition.serialize_json()),
'details': rapidjson.loads(condition.serialize_json()),
'uri': condition.condition_uri
},
'cid': fulfillment['fid']
@ -519,7 +519,7 @@ def get_input_condition(bigchain, fulfillment):
return {
'condition': {
'details': json.loads(condition.serialize_json()),
'details': rapidjson.loads(condition.serialize_json()),
'uri': condition.condition_uri
}
}

View File

@ -24,19 +24,20 @@ deserialize(serialize(data)) == data
True
```
After looking at this further, we decided that the python json module is still the best bet because it complies with the RFC. We can specify the encoding, separators used and enforce it to order by the keys to make sure that we obtain maximum interoperability.
Since BigchainDB performs a lot of serialization we decided to use [python-rapidjson](https://github.com/kenrobbins/python-rapidjson)
which is a python wrapper for [rapidjson](https://github.com/miloyip/rapidjson) a fast and fully RFC complient JSON parser.
```python
import json
import rapidjson
json.dumps(data, skipkeys=False, ensure_ascii=False,
encoding="utf-8", separators=(',', ':'),
sort_keys=True)
rapidjson.dumps(data, skipkeys=False,
ensure_ascii=False,
sort_keys=True)
```
- `skipkeys`: With skipkeys `False` if the provided keys are not a string the serialization will fail. This way we enforce all keys to be strings
- `ensure_ascii`: The RFC recommends `utf-8` for maximum interoperability. By setting `ensure_ascii` to `False` we allow unicode characters and force the encoding to `utf-8`.
- `separators`: We need to define a standard separator to use in the serialization. We did not do this different implementations could use different separators for serialization resulting in a still valid transaction but with a different hash e.g. an extra whitespace introduced in the serialization would not still create a valid JSON object but the hash would be different.
- `ensure_ascii`: The RFC recommends `utf-8` for maximum interoperability. By setting `ensure_ascii` to `False` we allow unicode characters and python-rapidjson forces the encoding to `utf-8`.
- `sort_keys`: Sorted output by keys.
Every time we need to perform some operation on the data like calculating the hash or signing/verifying the transaction, we need to use the previous criteria to serialize the data and then use the `byte` representation of the serialized data (if we treat the data as bytes we eliminate possible encoding errors e.g. unicode characters). For example:
```python

View File

@ -1,3 +1,7 @@
import json
import time
import rapidjson
from line_profiler import LineProfiler
import bigchaindb
@ -21,5 +25,71 @@ def speedtest_validate_transaction():
profiler.print_stats()
def speedtest_serialize_block_json():
# create a block
b = bigchaindb.Bigchain()
tx = b.create_transaction(b.me, b.me, None, 'CREATE')
tx_signed = b.sign_transaction(tx, b.me_private)
block = b.create_block([tx_signed] * 1000)
time_start = time.time()
for _ in range(1000):
_ = json.dumps(block, skipkeys=False, ensure_ascii=False, sort_keys=True)
time_elapsed = time.time() - time_start
print('speedtest_serialize_block_json: {} s'.format(time_elapsed))
def speedtest_serialize_block_rapidjson():
# create a block
b = bigchaindb.Bigchain()
tx = b.create_transaction(b.me, b.me, None, 'CREATE')
tx_signed = b.sign_transaction(tx, b.me_private)
block = b.create_block([tx_signed] * 1000)
time_start = time.time()
for _ in range(1000):
_ = rapidjson.dumps(block, skipkeys=False, ensure_ascii=False, sort_keys=True)
time_elapsed = time.time() - time_start
print('speedtest_serialize_block_rapidjson: {} s'.format(time_elapsed))
def speedtest_deserialize_block_json():
# create a block
b = bigchaindb.Bigchain()
tx = b.create_transaction(b.me, b.me, None, 'CREATE')
tx_signed = b.sign_transaction(tx, b.me_private)
block = b.create_block([tx_signed] * 1000)
block_serialized = json.dumps(block, skipkeys=False, ensure_ascii=False, sort_keys=True)
time_start = time.time()
for _ in range(1000):
_ = json.loads(block_serialized)
time_elapsed = time.time() - time_start
print('speedtest_deserialize_block_json: {} s'.format(time_elapsed))
def speedtest_deserialize_block_rapidjson():
# create a block
b = bigchaindb.Bigchain()
tx = b.create_transaction(b.me, b.me, None, 'CREATE')
tx_signed = b.sign_transaction(tx, b.me_private)
block = b.create_block([tx_signed] * 1000)
block_serialized = rapidjson.dumps(block, skipkeys=False, ensure_ascii=False, sort_keys=True)
time_start = time.time()
for _ in range(1000):
_ = rapidjson.loads(block_serialized)
time_elapsed = time.time() - time_start
print('speedtest_deserialize_block_rapidjson: {} s'.format(time_elapsed))
if __name__ == '__main__':
speedtest_validate_transaction()
speedtest_serialize_block_json()
speedtest_serialize_block_rapidjson()
speedtest_deserialize_block_json()
speedtest_deserialize_block_rapidjson()