diff --git a/bigchaindb/common/schema/__init__.py b/bigchaindb/common/schema/__init__.py index a69793ad..cc4bfb9d 100644 --- a/bigchaindb/common/schema/__init__.py +++ b/bigchaindb/common/schema/__init__.py @@ -3,6 +3,8 @@ import os.path import jsonschema import yaml +import rapidjson +import rapidjson_schema from bigchaindb.common.exceptions import SchemaValidationError @@ -25,7 +27,8 @@ def _load_schema(name): with open(path) as handle: schema = yaml.safe_load(handle) drop_schema_descriptions(schema) - return path, schema + fast_schema = rapidjson_schema.loads(rapidjson.dumps(schema)) + return path, (schema, fast_schema) TX_SCHEMA_PATH, TX_SCHEMA_COMMON = _load_schema('transaction') @@ -36,10 +39,27 @@ VOTE_SCHEMA_PATH, VOTE_SCHEMA = _load_schema('vote') def _validate_schema(schema, body): """ Validate data against a schema """ + + # Note + # + # Schema validation is currently the major CPU bottleneck of + # BigchainDB. the `jsonschema` library validates python data structures + # directly and produces nice error messages, but validation takes 4+ ms + # per transaction which is pretty slow. The rapidjson library validates + # much faster at 1.5ms, however it produces _very_ poor error messages. + # For this reason we use both, rapidjson as an optimistic pathway and + # jsonschema as a fallback in case there is a failure, so we can produce + # a helpful error message. + try: - jsonschema.validate(body, schema) - except jsonschema.ValidationError as exc: - raise SchemaValidationError(str(exc)) from exc + schema[1].validate(rapidjson.dumps(body)) + except ValueError as exc: + try: + jsonschema.validate(body, schema[0]) + except jsonschema.ValidationError as exc2: + raise SchemaValidationError(str(exc2)) from exc2 + raise Exception('jsonschema did not raise an exception, wheras rapidjson raised', + exc) def validate_transaction_schema(tx): diff --git a/setup.py b/setup.py index 55543ea3..4fd485c0 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ install_requires = [ 'jsonschema~=2.5.1', 'pyyaml~=3.12', 'aiohttp~=2.0', + 'python-rapidjson-schema==0.1.1', ] setup(