diff --git a/bigchaindb/backend/schema.py b/bigchaindb/backend/schema.py index 8192f6cb..80bbfa59 100644 --- a/bigchaindb/backend/schema.py +++ b/bigchaindb/backend/schema.py @@ -16,10 +16,17 @@ import logging import bigchaindb from bigchaindb.backend.connection import connect +from bigchaindb.common.exceptions import ValidationError +from bigchaindb.common.utils import validate_all_values_for_key logger = logging.getLogger(__name__) TABLES = ('bigchain', 'backlog', 'votes', 'assets', 'metadata') +VALID_LANGUAGES = ('danish', 'dutch', 'english', 'finnish', 'french', 'german', + 'hungarian', 'italian', 'norwegian', 'portuguese', 'romanian', + 'russian', 'spanish', 'swedish', 'turkish', 'none', + 'da', 'nl', 'en', 'fi', 'fr', 'de', 'hu', 'it', 'nb', 'pt', + 'ro', 'ru', 'es', 'sv', 'tr') @singledispatch @@ -99,3 +106,44 @@ def init_database(connection=None, dbname=None): create_database(connection, dbname) create_tables(connection, dbname) create_indexes(connection, dbname) + + +def validate_language_key(obj, key): + """Validate all nested "language" key in `obj`. + + Args: + obj (dict): dictionary whose "language" key is to be validated. + + Returns: + None: validation successful + + Raises: + ValidationError: will raise exception in case language is not valid. + """ + backend = bigchaindb.config['database']['backend'] + + if backend == 'mongodb': + data = obj.get(key, {}) + if isinstance(data, dict): + validate_all_values_for_key(data, 'language', validate_language) + + +def validate_language(value): + """Check if `value` is a valid language. + https://docs.mongodb.com/manual/reference/text-search-languages/ + + Args: + value (str): language to validated + + Returns: + None: validation successful + + Raises: + ValidationError: will raise exception in case language is not valid. + """ + if value not in VALID_LANGUAGES: + error_str = ('MongoDB does not support text search for the ' + 'language "{}". If you do not understand this error ' + 'message then please rename key/field "language" to ' + 'something else like "lang".').format(value) + raise ValidationError(error_str) diff --git a/bigchaindb/commands/bigchaindb.py b/bigchaindb/commands/bigchaindb.py index 9705065c..6320d279 100644 --- a/bigchaindb/commands/bigchaindb.py +++ b/bigchaindb/commands/bigchaindb.py @@ -196,7 +196,7 @@ def run_start(args): logger.info('RethinkDB started with PID %s' % proc.pid) try: - if args.initialize_database: + if not args.skip_initialize_database: logger.info('Initializing database') _run_init() except DatabaseAlreadyExists: @@ -302,10 +302,11 @@ def create_parser(): action='store_true', help='Run RethinkDB on start') - start_parser.add_argument('--init', - dest='initialize_database', + start_parser.add_argument('--no-init', + dest='skip_initialize_database', + default=False, action='store_true', - help='Force initialize database') + help='Skip database initialization') # parser for configuring the number of shards sharding_parser = subparsers.add_parser('set-shards', diff --git a/bigchaindb/common/utils.py b/bigchaindb/common/utils.py index e472f380..9ad448f5 100644 --- a/bigchaindb/common/utils.py +++ b/bigchaindb/common/utils.py @@ -52,53 +52,73 @@ def deserialize(data): def validate_txn_obj(obj_name, obj, key, validation_fun): - """Validates value associated to `key` in `obj` by applying - `validation_fun`. + """Validate value of `key` in `obj` using `validation_fun`. Args: obj_name (str): name for `obj` being validated. - obj (dict): dictonary object. + obj (dict): dictionary object. key (str): key to be validated in `obj`. validation_fun (function): function used to validate the value of `key`. Returns: - None: indicates validation successfull + None: indicates validation successful Raises: - ValidationError: `validation_fun` will raise this error on failure + ValidationError: `validation_fun` will raise exception on failure """ backend = bigchaindb.config['database']['backend'] if backend == 'mongodb': - data = obj.get(key, {}) or {} - validate_all_keys(obj_name, data, validation_fun) + data = obj.get(key, {}) + if isinstance(data, dict): + validate_all_keys(obj_name, data, validation_fun) def validate_all_keys(obj_name, obj, validation_fun): - """Validates all (nested) keys in `obj` by using `validation_fun` + """Validate all (nested) keys in `obj` by using `validation_fun`. Args: obj_name (str): name for `obj` being validated. - obj (dict): dictonary object. + obj (dict): dictionary object. validation_fun (function): function used to validate the value of `key`. Returns: - None: indicates validation successfull + None: indicates validation successful Raises: ValidationError: `validation_fun` will raise this error on failure """ for key, value in obj.items(): validation_fun(obj_name, key) - if type(value) is dict: + if isinstance(value, dict): validate_all_keys(obj_name, value, validation_fun) - return + + +def validate_all_values_for_key(obj, key, validation_fun): + """Validate value for all (nested) occurrence of `key` in `obj` + using `validation_fun`. + + Args: + obj (dict): dictionary object. + key (str): key whose value is to be validated. + validation_fun (function): function used to validate the value + of `key`. + + Raises: + ValidationError: `validation_fun` will raise this error on failure + """ + for vkey, value in obj.items(): + if vkey == key: + validation_fun(value) + elif isinstance(value, dict): + validate_all_values_for_key(value, key, validation_fun) def validate_key(obj_name, key): - """Check if `key` contains ".", "$" or null characters + """Check if `key` contains ".", "$" or null characters. + https://docs.mongodb.com/manual/reference/limits/#Restrictions-on-Field-Names Args: @@ -106,13 +126,13 @@ def validate_key(obj_name, key): key (str): key to validated Returns: - None: indicates validation successfull + None: validation successful Raises: - ValidationError: raise execption incase of regex match. + ValidationError: will raise exception in case of regex match. """ if re.search(r'^[$]|\.|\x00', key): error_str = ('Invalid key name "{}" in {} object. The ' 'key name cannot contain characters ' '".", "$" or null characters').format(key, obj_name) - raise ValidationError(error_str) from ValueError() + raise ValidationError(error_str) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index a1dde131..7660b224 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -11,6 +11,7 @@ from bigchaindb.common.transaction import Transaction from bigchaindb.common.utils import (gen_timestamp, serialize, validate_txn_obj, validate_key) from bigchaindb.common.schema import validate_transaction_schema +from bigchaindb.backend.schema import validate_language_key class Transaction(Transaction): @@ -87,6 +88,7 @@ class Transaction(Transaction): validate_transaction_schema(tx_body) validate_txn_obj('asset', tx_body['asset'], 'data', validate_key) validate_txn_obj('metadata', tx_body, 'metadata', validate_key) + validate_language_key(tx_body['asset'], 'data') return super().from_dict(tx_body) @classmethod diff --git a/docker-compose.benchmark.yml b/docker-compose.benchmark.yml index 2a2aacc2..c7319040 100644 --- a/docker-compose.benchmark.yml +++ b/docker-compose.benchmark.yml @@ -25,7 +25,7 @@ services: BIGCHAINDB_GRAPHITE_HOST: graphite ports: - "9984" - command: bigchaindb start --init + command: bigchaindb start graphite: image: hopsoft/graphite-statsd diff --git a/docker-compose.rdb.yml b/docker-compose.rdb.yml index e02aa444..15f91675 100644 --- a/docker-compose.rdb.yml +++ b/docker-compose.rdb.yml @@ -45,4 +45,4 @@ services: BIGCHAINDB_SERVER_BIND: 0.0.0.0:9984 ports: - "9984" - command: bigchaindb start --init + command: bigchaindb start diff --git a/docker-compose.yml b/docker-compose.yml index 8f774106..cd6aa2aa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,4 +30,4 @@ services: BIGCHAINDB_WSSERVER_HOST: 0.0.0.0 ports: - "9984" - command: bigchaindb start --init + command: bigchaindb start diff --git a/docs/server/requirements.txt b/docs/server/requirements.txt index 4321f44b..cd06eab9 100644 --- a/docs/server/requirements.txt +++ b/docs/server/requirements.txt @@ -3,3 +3,5 @@ recommonmark>=0.4.0 sphinx-rtd-theme>=0.1.9 sphinxcontrib-napoleon>=0.4.4 sphinxcontrib-httpdomain>=1.5.0 +pyyaml>=3.12 +bigchaindb diff --git a/docs/server/source/appendices/azure-quickstart-template.md b/docs/server/source/appendices/azure-quickstart-template.md index 13cda281..59f52fd3 100644 --- a/docs/server/source/appendices/azure-quickstart-template.md +++ b/docs/server/source/appendices/azure-quickstart-template.md @@ -33,7 +33,7 @@ API Server bind? (default `localhost:9984`): 0.0.0.0:9984 Finally, run BigchainDB Server by doing: ```text -bigchaindb start --init +bigchaindb start ``` BigchainDB Server should now be running on the Azure virtual machine. diff --git a/docs/server/source/data-models/asset-model.md b/docs/server/source/data-models/asset-model.md index eefa81bb..054a4083 100644 --- a/docs/server/source/data-models/asset-model.md +++ b/docs/server/source/data-models/asset-model.md @@ -2,6 +2,8 @@ To avoid redundant data in transactions, the asset model is different for `CREATE` and `TRANSFER` transactions. +## In CREATE Transactions + In a `CREATE` transaction, the `"asset"` must contain exactly one key-value pair. The key must be `"data"` and the value can be any valid JSON document, or `null`. For example: ```json { @@ -12,6 +14,15 @@ In a `CREATE` transaction, the `"asset"` must contain exactly one key-value pair } ``` +When using MongoDB for storage, certain restriction apply to all (including nested) keys of the `"data"` JSON document: + +* Keys (i.e. key names, not values) must **not** begin with the `$` character. +* Keys must not contain `.` or the null character (Unicode code point 0000). +* The key `"language"` (at any level in the hierarchy) is a special key and used for specifying text search language. Its value must be one of the allowed values; see the valid [Text Search Languages](https://docs.mongodb.com/manual/reference/text-search-languages/) in the MongoDB Docs. In BigchainDB, only the languages supported by _MongoDB community edition_ are allowed. + + +## In TRANSFER Transactions + In a `TRANSFER` transaction, the `"asset"` must contain exactly one key-value pair. They key must be `"id"` and the value must contain a transaction ID (i.e. a SHA3-256 hash: the ID of the `CREATE` transaction which created the asset, which also serves as the asset ID). For example: ```json { diff --git a/docs/server/source/data-models/transaction-model.rst b/docs/server/source/data-models/transaction-model.rst index 38e523bd..b4f2a4f1 100644 --- a/docs/server/source/data-models/transaction-model.rst +++ b/docs/server/source/data-models/transaction-model.rst @@ -46,6 +46,10 @@ Here's some explanation of the contents: - **metadata**: User-provided transaction metadata. It can be any valid JSON document, or ``null``. + **NOTE:** When using MongoDB for storage, certain restriction apply + to all (including nested) keys of the ``"data"`` JSON document: + 1) keys (i.e. key names, not values) must **not** begin with the ``$`` character, and + 2) keys must not contain ``.`` or the null character (Unicode code point 0000). **How the transaction ID is computed.** 1) Build a Python dictionary containing ``version``, ``inputs``, ``outputs``, ``operation``, ``asset``, ``metadata`` and their values, diff --git a/docs/server/source/dev-and-test/setup-bdb-host.md b/docs/server/source/dev-and-test/setup-bdb-host.md index 5feb8c42..cdee3c0b 100644 --- a/docs/server/source/dev-and-test/setup-bdb-host.md +++ b/docs/server/source/dev-and-test/setup-bdb-host.md @@ -27,7 +27,7 @@ waiting for connections on port 27017 To run BigchainDB Server, do: ```text -$ bigchaindb start --init +$ bigchaindb start ``` You can [run all the unit tests](running-all-tests.html) to test your installation. @@ -55,7 +55,7 @@ You can verify that RethinkDB is running by opening the RethinkDB web interface To run BigchainDB Server, do: ```text -$ bigchaindb start --init +$ bigchaindb start ``` You can [run all the unit tests](running-all-tests.html) to test your installation. diff --git a/docs/server/source/quickstart.md b/docs/server/source/quickstart.md index 2375fd5f..63ab8643 100644 --- a/docs/server/source/quickstart.md +++ b/docs/server/source/quickstart.md @@ -54,7 +54,7 @@ $ bigchaindb -y configure mongodb I. Run BigchainDB Server: ```text -$ bigchaindb start --init +$ bigchaindb start ``` J. Verify BigchainDB Server setup by visiting the BigchainDB Root URL in your browser: diff --git a/docs/server/source/server-reference/bigchaindb-cli.md b/docs/server/source/server-reference/bigchaindb-cli.md index fddfd3f5..790cb453 100644 --- a/docs/server/source/server-reference/bigchaindb-cli.md +++ b/docs/server/source/server-reference/bigchaindb-cli.md @@ -61,7 +61,7 @@ If you want to force-drop the database (i.e. skipping the yes/no prompt), then u ## bigchaindb start -Start BigchainDB assuming that the database has already been initialized using `bigchaindb init`. If that is not the case then passing the flag `--init` will initialize the database and start BigchainDB. +Start BigchainDB. It always begins by trying a `bigchaindb init` first. See the note in the documentation for `bigchaindb init`. The database initialization step is optional and can be skipped by passing the `--no-init` flag i.e. `bigchaindb start --no-init`. You can also use the `--dev-start-rethinkdb` command line option to automatically start rethinkdb with bigchaindb if rethinkdb is not already running, e.g. `bigchaindb --dev-start-rethinkdb start`. Note that this will also shutdown rethinkdb when the bigchaindb process stops. The option `--dev-allow-temp-keypair` will generate a keypair on the fly if no keypair is found, this is useful when you want to run a temporary instance of BigchainDB in a Docker container, for example. diff --git a/tests/README.md b/tests/README.md index 252fcda4..146d3bc6 100644 --- a/tests/README.md +++ b/tests/README.md @@ -105,17 +105,17 @@ $ docker-compose build First, start `RethinkDB` in the background: ```text -$ docker-compose up -d rdb +$ docker-compose -f docker-compose.rdb.yml up -d rdb ``` then run the tests using: ```text -$ docker-compose run --rm bdb-rdb py.test -v +$ docker-compose -f docker-compose.rdb.yml run --rm bdb-rdb py.test -v ``` to rebuild all the images (usually you only need to rebuild the `bdb` and - `bdb-rdb` images). + `bdb-rdb` images). If that fails, then do `make clean-pyc` and try again. ## Automated Testing of All Pull Requests diff --git a/tests/commands/conftest.py b/tests/commands/conftest.py index 1aef2d30..46f8a8f6 100644 --- a/tests/commands/conftest.py +++ b/tests/commands/conftest.py @@ -49,7 +49,7 @@ def run_start_args(request): config=param.get('config'), start_rethinkdb=param.get('start_rethinkdb', False), allow_temp_keypair=param.get('allow_temp_keypair', False), - initialize_database=param.get('initialize_database', True), + skip_initialize_database=param.get('skip_initialize_database', False), ) diff --git a/tests/commands/rethinkdb/test_commands.py b/tests/commands/rethinkdb/test_commands.py index 29a84972..c8990582 100644 --- a/tests/commands/rethinkdb/test_commands.py +++ b/tests/commands/rethinkdb/test_commands.py @@ -14,7 +14,7 @@ def test_bigchain_run_start_with_rethinkdb(mock_start_rethinkdb, from bigchaindb import config from bigchaindb.commands.bigchaindb import run_start args = Namespace(start_rethinkdb=True, allow_temp_keypair=False, config=None, yes=True, - initialize_database=True) + skip_initialize_database=False) run_start(args) mock_start_rethinkdb.assert_called_with() diff --git a/tests/commands/test_commands.py b/tests/commands/test_commands.py index 15aa9302..423e4614 100644 --- a/tests/commands/test_commands.py +++ b/tests/commands/test_commands.py @@ -40,7 +40,7 @@ def test_bigchain_run_start(mock_run_configure, from bigchaindb import config from bigchaindb.commands.bigchaindb import run_start args = Namespace(start_rethinkdb=False, allow_temp_keypair=False, config=None, yes=True, - initialize_database=True) + skip_initialize_database=False) run_start(args) mocked_setup_logging.assert_called_once_with(user_log_config=config['log']) @@ -290,7 +290,7 @@ def test_allow_temp_keypair_generates_one_on_the_fly( bigchaindb.config['keypair'] = {'private': None, 'public': None} args = Namespace(allow_temp_keypair=True, start_rethinkdb=False, config=None, yes=True, - initialize_database=True) + skip_initialize_database=False) run_start(args) mocked_setup_logging.assert_called_once_with( @@ -317,7 +317,7 @@ def test_allow_temp_keypair_doesnt_override_if_keypair_found(mock_gen_keypair, assert isinstance(original_private_key, str) args = Namespace(allow_temp_keypair=True, start_rethinkdb=False, config=None, yes=True, - initialize_database=True) + skip_initialize_database=False) run_start(args) mocked_setup_logging.assert_called_once_with( diff --git a/tests/conftest.py b/tests/conftest.py index b930e4ec..4b5c7946 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,6 +25,15 @@ USER_PRIVATE_KEY = '8eJ8q9ZQpReWyQT5aFCiwtZ5wDZC4eDnCen88p3tQ6ie' USER_PUBLIC_KEY = 'JEAkEJqLbbgDRAtMm8YAjGp759Aq2qTn9eaEHUj2XePE' +def pytest_runtest_setup(item): + if isinstance(item, item.Function): + if item.get_marker('skip_travis_rdb'): + if (os.getenv('TRAVIS_CI') == 'true' and + os.getenv('BIGCHAINDB_DATABASE_BACKEND') == 'rethinkdb'): + pytest.skip( + 'Skip test during Travis CI build when using rethinkdb') + + def pytest_addoption(parser): from bigchaindb.backend.connection import BACKENDS diff --git a/tests/integration/test_federation.py b/tests/integration/test_federation.py index 598412ff..22e2e8da 100644 --- a/tests/integration/test_federation.py +++ b/tests/integration/test_federation.py @@ -97,6 +97,7 @@ def process_vote(steps, result=None): @pytest.mark.bdb @pytest.mark.genesis +@pytest.mark.skip_travis_rdb def test_elect_valid(federation_3): [bx, (s0, s1, s2)] = federation_3 tx = input_single_create(bx[0]) @@ -115,6 +116,7 @@ def test_elect_valid(federation_3): @pytest.mark.bdb +@pytest.mark.skip_travis_rdb @pytest.mark.genesis def test_elect_invalid(federation_3): [bx, (s0, s1, s2)] = federation_3 @@ -135,6 +137,7 @@ def test_elect_invalid(federation_3): @pytest.mark.bdb @pytest.mark.genesis +@pytest.mark.skip_travis_rdb def test_elect_sybill(federation_3): [bx, (s0, s1, s2)] = federation_3 tx = input_single_create(bx[0]) diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index c6eb355a..64035c78 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -5,6 +5,7 @@ import pytest pytestmark = [pytest.mark.bdb, pytest.mark.usefixtures('processes')] +@pytest.mark.skip_travis_rdb def test_double_create(b, user_pk): from bigchaindb.models import Transaction from bigchaindb.backend.query import count_blocks @@ -12,9 +13,9 @@ def test_double_create(b, user_pk): metadata={'test': 'test'}).sign([b.me_private]) b.write_transaction(tx) - time.sleep(2) + time.sleep(5) b.write_transaction(tx) - time.sleep(2) + time.sleep(5) tx_returned = b.get_transaction(tx.id) # test that the tx can be queried diff --git a/tests/web/test_transactions.py b/tests/web/test_transactions.py index e5034697..ab01357a 100644 --- a/tests/web/test_transactions.py +++ b/tests/web/test_transactions.py @@ -47,6 +47,47 @@ def test_post_create_transaction_endpoint(b, client): assert res.json['outputs'][0]['public_keys'][0] == user_pub +@pytest.mark.parametrize("nested", [False, True]) +@pytest.mark.parametrize("language,expected_status_code", [ + ('danish', 202), ('dutch', 202), ('english', 202), ('finnish', 202), + ('french', 202), ('german', 202), ('hungarian', 202), ('italian', 202), + ('norwegian', 202), ('portuguese', 202), ('romanian', 202), ('none', 202), + ('russian', 202), ('spanish', 202), ('swedish', 202), ('turkish', 202), + ('da', 202), ('nl', 202), ('en', 202), ('fi', 202), ('fr', 202), + ('de', 202), ('hu', 202), ('it', 202), ('nb', 202), ('pt', 202), + ('ro', 202), ('ru', 202), ('es', 202), ('sv', 202), ('tr', 202), + ('any', 400) +]) +@pytest.mark.language +@pytest.mark.bdb +def test_post_create_transaction_with_language(b, client, nested, language, + expected_status_code): + from bigchaindb.models import Transaction + from bigchaindb.backend.mongodb.connection import MongoDBConnection + + if isinstance(b.connection, MongoDBConnection): + user_priv, user_pub = crypto.generate_key_pair() + lang_obj = {'language': language} + + if nested: + asset = {'root': lang_obj} + else: + asset = lang_obj + + tx = Transaction.create([user_pub], [([user_pub], 1)], + asset=asset) + tx = tx.sign([user_priv]) + res = client.post(TX_ENDPOINT, data=json.dumps(tx.to_dict())) + assert res.status_code == expected_status_code + if res.status_code == 400: + expected_error_message = ( + 'Invalid transaction (ValidationError): MongoDB does not support ' + 'text search for the language "{}". If you do not understand this ' + 'error message then please rename key/field "language" to something ' + 'else like "lang".').format(language) + assert res.json['message'] == expected_error_message + + @pytest.mark.parametrize("field", ['asset', 'metadata']) @pytest.mark.parametrize("value,err_key,expected_status_code", [ ({'bad.key': 'v'}, 'bad.key', 400),