From a3b01147e5a23096534b81f9566ef35028489ade Mon Sep 17 00:00:00 2001 From: diminator Date: Mon, 20 Mar 2017 21:35:47 +0100 Subject: [PATCH 01/60] added CORS for localhost --- bigchaindb/web/server.py | 27 ++++++++++++++++++++++++++- setup.py | 1 + 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/bigchaindb/web/server.py b/bigchaindb/web/server.py index bcd44d11..2740d572 100644 --- a/bigchaindb/web/server.py +++ b/bigchaindb/web/server.py @@ -2,11 +2,12 @@ The application is implemented in Flask and runs using Gunicorn. """ - +import os import copy import multiprocessing from flask import Flask +from flask_cors import CORS import gunicorn.app.base from bigchaindb import utils @@ -60,6 +61,30 @@ def create_app(*, debug=False, threads=4): app = Flask(__name__) + hostname = os.environ.get('DOCKER_MACHINE_IP', 'localhost') + + if not hostname: + hostname = 'localhost' + + origins = ('^(https?://)?(www\.)?({}|0|0.0.0.0|' + 'localhost|127.0.0.1)(\.com)?:\d{{1,5}}$').format(hostname), + CORS(app, + origins=origins, + headers=( + 'x-requested-with', + 'content-type', + 'accept', + 'origin', + 'authorization', + 'x-csrftoken', + 'withcredentials', + 'cache-control', + 'cookie', + 'session-id', + ), + supports_credentials=True, + ) + app.debug = debug app.config['bigchain_pool'] = utils.pool(Bigchain, size=threads) diff --git a/setup.py b/setup.py index 7a38bb1f..e74d4ee1 100644 --- a/setup.py +++ b/setup.py @@ -70,6 +70,7 @@ install_requires = [ 'python-rapidjson==0.0.8', 'logstats>=0.2.1', 'flask>=0.10.1', + 'flask-cors==2.1.2', 'flask-restful~=0.3.0', 'requests~=2.9', 'gunicorn~=19.0', From 94b5318e52bfcbad8251e231f65c08c22f696044 Mon Sep 17 00:00:00 2001 From: diminator Date: Wed, 22 Mar 2017 13:07:30 +0100 Subject: [PATCH 02/60] remove localhost --- bigchaindb/web/server.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/bigchaindb/web/server.py b/bigchaindb/web/server.py index 2740d572..84bbff8d 100644 --- a/bigchaindb/web/server.py +++ b/bigchaindb/web/server.py @@ -61,15 +61,7 @@ def create_app(*, debug=False, threads=4): app = Flask(__name__) - hostname = os.environ.get('DOCKER_MACHINE_IP', 'localhost') - - if not hostname: - hostname = 'localhost' - - origins = ('^(https?://)?(www\.)?({}|0|0.0.0.0|' - 'localhost|127.0.0.1)(\.com)?:\d{{1,5}}$').format(hostname), CORS(app, - origins=origins, headers=( 'x-requested-with', 'content-type', @@ -82,8 +74,7 @@ def create_app(*, debug=False, threads=4): 'cookie', 'session-id', ), - supports_credentials=True, - ) + supports_credentials=True) app.debug = debug From 9c4b284db9b5b418a942409c54dd0d187f7a61b5 Mon Sep 17 00:00:00 2001 From: Lavina Date: Wed, 29 Mar 2017 17:10:55 +0530 Subject: [PATCH 03/60] Add a new exception for MongoDB Authentication errors --- bigchaindb/backend/exceptions.py | 4 ++++ bigchaindb/backend/mongodb/connection.py | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bigchaindb/backend/exceptions.py b/bigchaindb/backend/exceptions.py index 017e19e4..e59b317b 100644 --- a/bigchaindb/backend/exceptions.py +++ b/bigchaindb/backend/exceptions.py @@ -9,6 +9,10 @@ class ConnectionError(BackendError): """Exception raised when the connection to the backend fails.""" +class AuthenticationError(ConnectionError): + """Exception raised when MongoDB Authentication fails""" + + class OperationError(BackendError): """Exception raised when a backend operation fails.""" diff --git a/bigchaindb/backend/mongodb/connection.py b/bigchaindb/backend/mongodb/connection.py index 8688e243..863059f8 100644 --- a/bigchaindb/backend/mongodb/connection.py +++ b/bigchaindb/backend/mongodb/connection.py @@ -8,7 +8,8 @@ from bigchaindb.utils import Lazy from bigchaindb.common.exceptions import ConfigurationError from bigchaindb.backend.exceptions import (DuplicateKeyError, OperationError, - ConnectionError) + ConnectionError, + AuthenticationError) from bigchaindb.backend.connection import Connection logger = logging.getLogger(__name__) @@ -83,6 +84,8 @@ class MongoDBConnection(Connection): # `initialize_replica_set` might raise `ConnectionFailure` or `OperationFailure`. except (pymongo.errors.ConnectionFailure, pymongo.errors.OperationFailure) as exc: + if "Authentication fail" in str(exc): + raise AuthenticationError() from exc raise ConnectionError() from exc From 03172c870a836e8908059b555700395cd239cf1c Mon Sep 17 00:00:00 2001 From: diminator Date: Fri, 31 Mar 2017 11:18:44 +0200 Subject: [PATCH 04/60] flake8 import --- bigchaindb/web/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bigchaindb/web/server.py b/bigchaindb/web/server.py index 84bbff8d..fc5c6e85 100644 --- a/bigchaindb/web/server.py +++ b/bigchaindb/web/server.py @@ -2,7 +2,6 @@ The application is implemented in Flask and runs using Gunicorn. """ -import os import copy import multiprocessing From 3bb5973f60d232714c99fbc92ab2988f23113f7f Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Wed, 12 Apr 2017 17:29:17 +0200 Subject: [PATCH 05/60] fix block order bug --- bigchaindb/backend/mongodb/changefeed.py | 74 +++++++++++++----------- bigchaindb/backend/mongodb/query.py | 26 +++------ bigchaindb/backend/query.py | 19 ++---- bigchaindb/backend/rethinkdb/query.py | 15 ----- bigchaindb/core.py | 10 ---- bigchaindb/pipelines/vote.py | 25 +++----- tests/backend/mongodb/test_changefeed.py | 70 +++++++++------------- tests/backend/mongodb/test_queries.py | 41 ++++++++----- tests/backend/test_generics.py | 2 +- tests/pipelines/test_vote.py | 54 +---------------- 10 files changed, 116 insertions(+), 220 deletions(-) diff --git a/bigchaindb/backend/mongodb/changefeed.py b/bigchaindb/backend/mongodb/changefeed.py index 3abcbeda..59d4e721 100644 --- a/bigchaindb/backend/mongodb/changefeed.py +++ b/bigchaindb/backend/mongodb/changefeed.py @@ -20,50 +20,21 @@ class MongoDBChangeFeed(ChangeFeed): We emulate the behaviour of the RethinkDB changefeed by using a tailable cursor that listens for events on the oplog. """ - def run_forever(self): for element in self.prefeed: self.outqueue.put(element) - while True: - try: - # XXX: hack to force reconnection. Why? Because the cursor - # in `run_changefeed` does not run in the context of a - # Connection object, so if the connection is lost we need - # to manually reset the connection to None. - # See #1154 - self.connection.connection = None - self.run_changefeed() - break - except (BackendError, pymongo.errors.ConnectionFailure): - logger.exception('Error connecting to the database, retrying') - time.sleep(1) - - def run_changefeed(self): - dbname = self.connection.dbname table = self.table - namespace = '{}.{}'.format(dbname, table) + dbname = self.connection.dbname + # last timestamp in the oplog. We only care for operations happening # in the future. last_ts = self.connection.run( self.connection.query().local.oplog.rs.find() .sort('$natural', pymongo.DESCENDING).limit(1) .next()['ts']) - # tailable cursor. A tailable cursor will remain open even after the - # last result was returned. ``TAILABLE_AWAIT`` will block for some - # timeout after the last result was returned. If no result is received - # in the meantime it will raise a StopIteration excetiption. - cursor = self.connection.run( - self.connection.query().local.oplog.rs.find( - {'ns': namespace, 'ts': {'$gt': last_ts}}, - cursor_type=pymongo.CursorType.TAILABLE_AWAIT - )) - while cursor.alive: - try: - record = cursor.next() - except StopIteration: - continue + for record in run_changefeed(self.connection, table, last_ts): is_insert = record['op'] == 'i' is_delete = record['op'] == 'd' @@ -94,7 +65,8 @@ class MongoDBChangeFeed(ChangeFeed): @register_changefeed(MongoDBConnection) -def get_changefeed(connection, table, operation, *, prefeed=None): +def get_changefeed(connection, table, operation, *, prefeed=None, + get_resumption_pointer=None): """Return a MongoDB changefeed. Returns: @@ -103,4 +75,38 @@ def get_changefeed(connection, table, operation, *, prefeed=None): """ return MongoDBChangeFeed(table, operation, prefeed=prefeed, - connection=connection) + connection=connection, + get_resumption_pointer=get_resumption_pointer) + + +_FEED_STOP = False +"""If it's True then the changefeed will return when there are no more items. +""" + + +def run_changefeed(conn, table, last_ts): + """Encapsulate operational logic of tailing changefeed from MongoDB + """ + while True: + try: + # XXX: hack to force reconnection, in case the connection + # is lost while waiting on the cursor. See #1154. + conn.connection = 1 + namespace = conn.dbname + '.' + table + cursor = conn.conn.local.oplog.rs.find( + {'ns': namespace, 'ts': {'$gt': last_ts}}, + {'o._id': False}, + cursor_type=pymongo.CursorType.TAILABLE_AWAIT + ) + logging.debug('Tailing oplog at %s/%s', namespace, last_ts) + while cursor.alive: + try: + record = cursor.next() + yield record + last_ts = record['ts'] + except StopIteration: + if _FEED_STOP: + return + except (BackendError, pymongo.errors.ConnectionFailure): + logger.exception('Lost connection while tailing oplog, retrying') + time.sleep(1) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 74b9c35a..a0d8ed12 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -5,6 +5,7 @@ from time import time from pymongo import ReturnDocument from bigchaindb import backend +from bigchaindb.backend.mongodb.changefeed import run_changefeed from bigchaindb.common.exceptions import CyclicBlockchainError from bigchaindb.common.transaction import Transaction from bigchaindb.backend.exceptions import DuplicateKeyError @@ -283,21 +284,10 @@ def get_last_voted_block(conn, node_pubkey): @register_query(MongoDBConnection) -def get_unvoted_blocks(conn, node_pubkey): - return conn.run( - conn.collection('bigchain') - .aggregate([ - {'$lookup': { - 'from': 'votes', - 'localField': 'id', - 'foreignField': 'vote.voting_for_block', - 'as': 'votes' - }}, - {'$match': { - 'votes.node_pubkey': {'$ne': node_pubkey}, - 'block.transactions.operation': {'$ne': 'GENESIS'} - }}, - {'$project': { - 'votes': False, '_id': False - }} - ])) +def get_new_blocks_feed(conn, start_block_id): + namespace = conn.dbname + '.bigchain' + query = {'o.id': start_block_id, 'op': 'i', 'ns': namespace} + # Neccesary to find in descending order since tests may write same block id several times + last_ts = conn.conn.local.oplog.rs.find(query).sort('$natural', -1).next()['ts'] + feed = run_changefeed(conn, 'bigchain', last_ts) + return (evt['o'] for evt in feed if evt['op'] == 'i') diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 9aa653d7..8050c415 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -273,20 +273,6 @@ def get_last_voted_block(connection, node_pubkey): raise NotImplementedError -@singledispatch -def get_unvoted_blocks(connection, node_pubkey): - """Return all the blocks that have not been voted by the specified node. - - Args: - node_pubkey (str): base58 encoded public key - - Returns: - :obj:`list` of :obj:`dict`: a list of unvoted blocks - """ - - raise NotImplementedError - - @singledispatch def get_txids_filtered(connection, asset_id, operation=None): """ @@ -298,3 +284,8 @@ def get_txids_filtered(connection, asset_id, operation=None): """ raise NotImplementedError + + +@singledispatch +def get_new_blocks_feed(connection, start_block_id): + raise NotImplementedError diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index 6011cc8c..deb02244 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -238,18 +238,3 @@ def get_last_voted_block(connection, node_pubkey): return connection.run( r.table('bigchain', read_mode=READ_MODE) .get(last_block_id)) - - -@register_query(RethinkDBConnection) -def get_unvoted_blocks(connection, node_pubkey): - unvoted = connection.run( - r.table('bigchain', read_mode=READ_MODE) - .filter(lambda block: r.table('votes', read_mode=READ_MODE) - .get_all([block['id'], node_pubkey], index='block_and_voter') - .is_empty()) - .order_by(r.asc(r.row['block']['timestamp']))) - - # FIXME: I (@vrde) don't like this solution. Filtering should be done at a - # database level. Solving issue #444 can help untangling the situation - unvoted_blocks = filter(lambda block: not utils.is_genesis_block(block), unvoted) - return unvoted_blocks diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 5d2e9c03..379f9c6f 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -594,16 +594,6 @@ class Bigchain(object): return Block.from_dict(backend.query.get_last_voted_block(self.connection, self.me)) - def get_unvoted_blocks(self): - """Return all the blocks that have not been voted on by this node. - - Returns: - :obj:`list` of :obj:`dict`: a list of unvoted blocks - """ - - # XXX: should this return instaces of Block? - return backend.query.get_unvoted_blocks(self.connection, self.me) - def block_election(self, block): if type(block) != dict: block = block.to_dict() diff --git a/bigchaindb/pipelines/vote.py b/bigchaindb/pipelines/vote.py index 9664c520..ee5c3ada 100644 --- a/bigchaindb/pipelines/vote.py +++ b/bigchaindb/pipelines/vote.py @@ -10,10 +10,7 @@ from collections import Counter from multipipes import Pipeline, Node -import bigchaindb -from bigchaindb import Bigchain -from bigchaindb import backend -from bigchaindb.backend.changefeed import ChangeFeed +from bigchaindb import Bigchain, backend from bigchaindb.models import Transaction, Block from bigchaindb.common import exceptions @@ -151,20 +148,13 @@ class Vote: return vote -def initial(): - """Return unvoted blocks.""" - b = Bigchain() - rs = b.get_unvoted_blocks() - return rs - - def create_pipeline(): """Create and return the pipeline of operations to be distributed on different processes.""" voter = Vote() - vote_pipeline = Pipeline([ + return Pipeline([ Node(voter.validate_block), Node(voter.ungroup), Node(voter.validate_tx, fraction_of_cores=1), @@ -172,13 +162,14 @@ def create_pipeline(): Node(voter.write_vote) ]) - return vote_pipeline - def get_changefeed(): - connection = backend.connect(**bigchaindb.config['database']) - return backend.get_changefeed(connection, 'bigchain', ChangeFeed.INSERT, - prefeed=initial()) + """Create and return ordered changefeed of blocks starting from + last voted block""" + b = Bigchain() + last_voted = b.get_last_voted_block().id + feed = backend.query.get_new_blocks_feed(b.connection, last_voted) + return Node(feed.__next__) def start(): diff --git a/tests/backend/mongodb/test_changefeed.py b/tests/backend/mongodb/test_changefeed.py index 00dcaca5..cfe50e57 100644 --- a/tests/backend/mongodb/test_changefeed.py +++ b/tests/backend/mongodb/test_changefeed.py @@ -10,34 +10,32 @@ def mock_changefeed_data(): return [ { 'op': 'i', - 'o': {'_id': '', 'msg': 'seems like we have an insert here'} + 'o': {'_id': '', 'msg': 'seems like we have an insert here'}, + 'ts': 1, }, { 'op': 'd', - 'o': {'msg': 'seems like we have a delete here'} + 'o': {'msg': 'seems like we have a delete here'}, + 'ts': 2, }, { 'op': 'u', 'o': {'msg': 'seems like we have an update here'}, - 'o2': {'_id': 'some-id'} + 'o2': {'_id': 'some-id'}, + 'ts': 3, }, ] @pytest.mark.bdb -@mock.patch('pymongo.cursor.Cursor.alive', new_callable=mock.PropertyMock) +@mock.patch('bigchaindb.backend.mongodb.changefeed._FEED_STOP', True) @mock.patch('pymongo.cursor.Cursor.next') -def test_changefeed_insert(mock_cursor_next, mock_cursor_alive, - mock_changefeed_data): +def test_changefeed_insert(mock_cursor_next, mock_changefeed_data): from bigchaindb.backend import get_changefeed, connect from bigchaindb.backend.changefeed import ChangeFeed # setup connection and mocks conn = connect() - # changefeed.run_forever only returns when the cursor is closed - # so we mock `alive` to be False it finishes reading the mocked data - mock_cursor_alive.side_effect = [mock.DEFAULT, mock.DEFAULT, - mock.DEFAULT, mock.DEFAULT, False] # mock the `next` method of the cursor to return the mocked data mock_cursor_next.side_effect = [mock.DEFAULT] + mock_changefeed_data @@ -51,16 +49,13 @@ def test_changefeed_insert(mock_cursor_next, mock_cursor_alive, @pytest.mark.bdb -@mock.patch('pymongo.cursor.Cursor.alive', new_callable=mock.PropertyMock) +@mock.patch('bigchaindb.backend.mongodb.changefeed._FEED_STOP', True) @mock.patch('pymongo.cursor.Cursor.next') -def test_changefeed_delete(mock_cursor_next, mock_cursor_alive, - mock_changefeed_data): +def test_changefeed_delete(mock_cursor_next, mock_changefeed_data): from bigchaindb.backend import get_changefeed, connect from bigchaindb.backend.changefeed import ChangeFeed conn = connect() - mock_cursor_alive.side_effect = [mock.DEFAULT, mock.DEFAULT, - mock.DEFAULT, mock.DEFAULT, False] mock_cursor_next.side_effect = [mock.DEFAULT] + mock_changefeed_data outpipe = Pipe() @@ -73,17 +68,15 @@ def test_changefeed_delete(mock_cursor_next, mock_cursor_alive, @pytest.mark.bdb +@mock.patch('bigchaindb.backend.mongodb.changefeed._FEED_STOP', True) @mock.patch('pymongo.collection.Collection.find_one') -@mock.patch('pymongo.cursor.Cursor.alive', new_callable=mock.PropertyMock) @mock.patch('pymongo.cursor.Cursor.next') -def test_changefeed_update(mock_cursor_next, mock_cursor_alive, - mock_cursor_find_one, mock_changefeed_data): +def test_changefeed_update(mock_cursor_next, mock_cursor_find_one, + mock_changefeed_data): from bigchaindb.backend import get_changefeed, connect from bigchaindb.backend.changefeed import ChangeFeed conn = connect() - mock_cursor_alive.side_effect = [mock.DEFAULT, mock.DEFAULT, - mock.DEFAULT, mock.DEFAULT, False] mock_cursor_next.side_effect = [mock.DEFAULT] + mock_changefeed_data mock_cursor_find_one.return_value = mock_changefeed_data[2]['o'] @@ -101,18 +94,15 @@ def test_changefeed_update(mock_cursor_next, mock_cursor_alive, @pytest.mark.bdb +@mock.patch('bigchaindb.backend.mongodb.changefeed._FEED_STOP', True) @mock.patch('pymongo.collection.Collection.find_one') -@mock.patch('pymongo.cursor.Cursor.alive', new_callable=mock.PropertyMock) @mock.patch('pymongo.cursor.Cursor.next') -def test_changefeed_multiple_operations(mock_cursor_next, mock_cursor_alive, - mock_cursor_find_one, +def test_changefeed_multiple_operations(mock_cursor_next, mock_cursor_find_one, mock_changefeed_data): from bigchaindb.backend import get_changefeed, connect from bigchaindb.backend.changefeed import ChangeFeed conn = connect() - mock_cursor_alive.side_effect = [mock.DEFAULT, mock.DEFAULT, - mock.DEFAULT, mock.DEFAULT, False] mock_cursor_next.side_effect = [mock.DEFAULT] + mock_changefeed_data mock_cursor_find_one.return_value = mock_changefeed_data[2]['o'] @@ -128,16 +118,13 @@ def test_changefeed_multiple_operations(mock_cursor_next, mock_cursor_alive, @pytest.mark.bdb -@mock.patch('pymongo.cursor.Cursor.alive', new_callable=mock.PropertyMock) +@mock.patch('bigchaindb.backend.mongodb.changefeed._FEED_STOP', True) @mock.patch('pymongo.cursor.Cursor.next') -def test_changefeed_prefeed(mock_cursor_next, mock_cursor_alive, - mock_changefeed_data): +def test_changefeed_prefeed(mock_cursor_next, mock_changefeed_data): from bigchaindb.backend import get_changefeed, connect from bigchaindb.backend.changefeed import ChangeFeed conn = connect() - mock_cursor_alive.side_effect = [mock.DEFAULT, mock.DEFAULT, - mock.DEFAULT, mock.DEFAULT, False] mock_cursor_next.side_effect = [mock.DEFAULT] + mock_changefeed_data outpipe = Pipe() @@ -150,19 +137,14 @@ def test_changefeed_prefeed(mock_cursor_next, mock_cursor_alive, @pytest.mark.bdb -@mock.patch('bigchaindb.backend.mongodb.changefeed.MongoDBChangeFeed.run_changefeed') # noqa -def test_connection_failure(mock_run_changefeed): - from bigchaindb.backend import get_changefeed, connect +def test_connection_failure(): from bigchaindb.backend.exceptions import ConnectionError - from bigchaindb.backend.changefeed import ChangeFeed + from bigchaindb.backend.mongodb.changefeed import run_changefeed - conn = connect() - mock_run_changefeed.side_effect = [ConnectionError(), - mock.DEFAULT] - - changefeed = get_changefeed(conn, 'backlog', ChangeFeed.INSERT) - changefeed.run_forever() - - # run_changefeed raises an exception the first time its called and then - # it's called again - assert mock_run_changefeed.call_count == 2 + conn = mock.MagicMock() + find = conn.conn.local.oplog.rs.find + find.side_effect = [ConnectionError(), RuntimeError()] + changefeed = run_changefeed(conn, 'backlog', -1) + with pytest.raises(RuntimeError): + for record in changefeed: + assert False, 'Shouldn\'t get here' diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index bd7e75f1..f58ba464 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -1,4 +1,5 @@ import pytest +from unittest import mock pytestmark = pytest.mark.bdb @@ -377,21 +378,6 @@ def test_get_last_voted_block(genesis_block, signed_create_tx, b): query.get_last_voted_block(conn, b.me) -def test_get_unvoted_blocks(signed_create_tx): - from bigchaindb.backend import connect, query - from bigchaindb.models import Block - conn = connect() - - # create and insert a block - block = Block(transactions=[signed_create_tx], node_pubkey='aaa') - conn.db.bigchain.insert_one(block.to_dict()) - - unvoted_blocks = list(query.get_unvoted_blocks(conn, 'aaa')) - - assert len(unvoted_blocks) == 1 - assert unvoted_blocks[0] == block.to_dict() - - def test_get_txids_filtered(signed_create_tx, signed_transfer_tx): from bigchaindb.backend import connect, query from bigchaindb.models import Block, Transaction @@ -417,3 +403,28 @@ def test_get_txids_filtered(signed_create_tx, signed_transfer_tx): # Test get by asset and TRANSFER txids = set(query.get_txids_filtered(conn, asset_id, Transaction.TRANSFER)) assert txids == {signed_transfer_tx.id} + + +@mock.patch('bigchaindb.backend.mongodb.changefeed._FEED_STOP', True) +def test_get_new_blocks_feed(b, create_tx): + from bigchaindb.backend import query + from bigchaindb.models import Block + import random + + def create_block(): + ts = str(random.random()) + block = Block(transactions=[create_tx], timestamp=ts) + b.write_block(block) + return block.to_dict() + + create_block() + b1 = create_block() + b2 = create_block() + + feed = query.get_new_blocks_feed(b.connection, b1['id']) + + assert feed.__next__() == b2 + + b3 = create_block() + + assert list(feed) == [b3] diff --git a/tests/backend/test_generics.py b/tests/backend/test_generics.py index 57a644ee..45400c0e 100644 --- a/tests/backend/test_generics.py +++ b/tests/backend/test_generics.py @@ -31,11 +31,11 @@ def test_schema(schema_func_name, args_qty): ('get_block', 1), ('write_vote', 1), ('get_last_voted_block', 1), - ('get_unvoted_blocks', 1), ('get_spent', 2), ('get_votes_by_block_id_and_voter', 2), ('update_transaction', 2), ('get_transaction_from_block', 2), + ('get_new_blocks_feed', 1), )) def test_query(query_func_name, args_qty): from bigchaindb.backend import query diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index 7df7ca11..12f84592 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -509,54 +509,6 @@ def test_invalid_block_voting(monkeypatch, b, user_pk, genesis_block): vote_doc['signature']) is True -@pytest.mark.genesis -def test_voter_considers_unvoted_blocks_when_single_node(monkeypatch, b): - from bigchaindb.backend import query - from bigchaindb.pipelines import vote - - outpipe = Pipe() - - monkeypatch.setattr('time.time', lambda: 1000000000) - - block_ids = [] - # insert blocks in the database while the voter process is not listening - # (these blocks won't appear in the changefeed) - monkeypatch.setattr('time.time', lambda: 1000000020) - block_1 = dummy_block(b) - block_ids.append(block_1.id) - monkeypatch.setattr('time.time', lambda: 1000000030) - b.write_block(block_1) - block_2 = dummy_block(b) - block_ids.append(block_2.id) - b.write_block(block_2) - - vote_pipeline = vote.create_pipeline() - vote_pipeline.setup(indata=vote.get_changefeed(), outdata=outpipe) - vote_pipeline.start() - - # We expects two votes, so instead of waiting an arbitrary amount - # of time, we can do two blocking calls to `get` - outpipe.get() - outpipe.get() - - # create a new block that will appear in the changefeed - monkeypatch.setattr('time.time', lambda: 1000000040) - block_3 = dummy_block(b) - block_ids.append(block_3.id) - b.write_block(block_3) - - # Same as before with the two `get`s - outpipe.get() - - vote_pipeline.terminate() - - # retrieve vote - votes = [list(query.get_votes_by_block_id(b.connection, _id))[0] - for _id in block_ids] - - assert all(vote['node_pubkey'] == b.me for vote in votes) - - @pytest.mark.genesis def test_voter_chains_blocks_with_the_previous_ones(monkeypatch, b): from bigchaindb.backend import query @@ -564,16 +516,14 @@ def test_voter_chains_blocks_with_the_previous_ones(monkeypatch, b): outpipe = Pipe() - monkeypatch.setattr('time.time', lambda: 1000000000) - block_ids = [] - monkeypatch.setattr('time.time', lambda: 1000000020) block_1 = dummy_block(b) + block_1.timestamp = str(random.random()) block_ids.append(block_1.id) b.write_block(block_1) - monkeypatch.setattr('time.time', lambda: 1000000030) block_2 = dummy_block(b) + block_2.timestamp = str(random.random()) block_ids.append(block_2.id) b.write_block(block_2) From 8f55febefba2127360eb190726acfd76c58e0fb9 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Thu, 13 Apr 2017 10:53:13 +0200 Subject: [PATCH 06/60] rethinkdb changefeed update --- bigchaindb/backend/mongodb/changefeed.py | 8 ++--- bigchaindb/backend/rethinkdb/changefeed.py | 26 +++++++++------ bigchaindb/backend/rethinkdb/query.py | 14 ++++++++ bigchaindb/pipelines/vote.py | 4 +-- tests/backend/mongodb/test_queries.py | 2 +- tests/pipelines/test_vote.py | 39 ---------------------- 6 files changed, 35 insertions(+), 58 deletions(-) diff --git a/bigchaindb/backend/mongodb/changefeed.py b/bigchaindb/backend/mongodb/changefeed.py index 59d4e721..59d53b72 100644 --- a/bigchaindb/backend/mongodb/changefeed.py +++ b/bigchaindb/backend/mongodb/changefeed.py @@ -15,7 +15,7 @@ register_changefeed = module_dispatch_registrar(backend.changefeed) class MongoDBChangeFeed(ChangeFeed): - """This class implements a MongoDB changefeed. + """This class implements a MongoDB changefeed as a multipipes Node. We emulate the behaviour of the RethinkDB changefeed by using a tailable cursor that listens for events on the oplog. @@ -65,8 +65,7 @@ class MongoDBChangeFeed(ChangeFeed): @register_changefeed(MongoDBConnection) -def get_changefeed(connection, table, operation, *, prefeed=None, - get_resumption_pointer=None): +def get_changefeed(connection, table, operation, *, prefeed=None): """Return a MongoDB changefeed. Returns: @@ -75,8 +74,7 @@ def get_changefeed(connection, table, operation, *, prefeed=None, """ return MongoDBChangeFeed(table, operation, prefeed=prefeed, - connection=connection, - get_resumption_pointer=get_resumption_pointer) + connection=connection) _FEED_STOP = False diff --git a/bigchaindb/backend/rethinkdb/changefeed.py b/bigchaindb/backend/rethinkdb/changefeed.py index 390ada9a..d0cfd5bb 100644 --- a/bigchaindb/backend/rethinkdb/changefeed.py +++ b/bigchaindb/backend/rethinkdb/changefeed.py @@ -14,22 +14,13 @@ register_changefeed = module_dispatch_registrar(backend.changefeed) class RethinkDBChangeFeed(ChangeFeed): - """This class wraps a RethinkDB changefeed.""" + """This class wraps a RethinkDB changefeed as a multipipes Node.""" def run_forever(self): for element in self.prefeed: self.outqueue.put(element) - while True: - try: - self.run_changefeed() - break - except (BackendError, r.ReqlDriverError) as exc: - logger.exception('Error connecting to the database, retrying') - time.sleep(1) - - def run_changefeed(self): - for change in self.connection.run(r.table(self.table).changes()): + for change in run_changefeed(self.connection, self.table): is_insert = change['old_val'] is None is_delete = change['new_val'] is None is_update = not is_insert and not is_delete @@ -42,6 +33,19 @@ class RethinkDBChangeFeed(ChangeFeed): self.outqueue.put(change['new_val']) +def run_changefeed(connection, table): + """Encapsulate operational logic of tailing changefeed from RethinkDB + """ + while True: + try: + for change in connection.run(r.table(table).changes()): + yield change + break + except (BackendError, r.ReqlDriverError) as exc: + logger.exception('Error connecting to the database, retrying') + time.sleep(1) + + @register_changefeed(RethinkDBConnection) def get_changefeed(connection, table, operation, *, prefeed=None): """Return a RethinkDB changefeed. diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index deb02244..cc6bb96f 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -1,15 +1,20 @@ from itertools import chain +import logging as logger from time import time import rethinkdb as r from bigchaindb import backend, utils +from bigchaindb.backend.rethinkdb import changefeed from bigchaindb.common import exceptions from bigchaindb.common.transaction import Transaction from bigchaindb.backend.utils import module_dispatch_registrar from bigchaindb.backend.rethinkdb.connection import RethinkDBConnection +logger = logger.getLogger(__name__) + + READ_MODE = 'majority' WRITE_DURABILITY = 'hard' @@ -238,3 +243,12 @@ def get_last_voted_block(connection, node_pubkey): return connection.run( r.table('bigchain', read_mode=READ_MODE) .get(last_block_id)) + + +@register_query(RethinkDBConnection) +def get_new_blocks_feed(connection, start_block_id): + logger.warning('RethinkDB changefeed unable to resume from given block: %s', + start_block_id) + # In order to get blocks in the correct order, it may be acceptable to + # look in the votes table to see what order other nodes have used. + return changefeed.run_changefeed(connection, 'bigchain') diff --git a/bigchaindb/pipelines/vote.py b/bigchaindb/pipelines/vote.py index ee5c3ada..5140a793 100644 --- a/bigchaindb/pipelines/vote.py +++ b/bigchaindb/pipelines/vote.py @@ -167,8 +167,8 @@ def get_changefeed(): """Create and return ordered changefeed of blocks starting from last voted block""" b = Bigchain() - last_voted = b.get_last_voted_block().id - feed = backend.query.get_new_blocks_feed(b.connection, last_voted) + last_block_id = b.get_last_voted_block().id + feed = backend.query.get_new_blocks_feed(b.connection, last_block_id) return Node(feed.__next__) diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index f58ba464..7df1022a 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -426,5 +426,5 @@ def test_get_new_blocks_feed(b, create_tx): assert feed.__next__() == b2 b3 = create_block() - + assert list(feed) == [b3] diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index 12f84592..bc8903cb 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -509,45 +509,6 @@ def test_invalid_block_voting(monkeypatch, b, user_pk, genesis_block): vote_doc['signature']) is True -@pytest.mark.genesis -def test_voter_chains_blocks_with_the_previous_ones(monkeypatch, b): - from bigchaindb.backend import query - from bigchaindb.pipelines import vote - - outpipe = Pipe() - - block_ids = [] - block_1 = dummy_block(b) - block_1.timestamp = str(random.random()) - block_ids.append(block_1.id) - b.write_block(block_1) - - block_2 = dummy_block(b) - block_2.timestamp = str(random.random()) - block_ids.append(block_2.id) - b.write_block(block_2) - - vote_pipeline = vote.create_pipeline() - vote_pipeline.setup(indata=vote.get_changefeed(), outdata=outpipe) - vote_pipeline.start() - - # We expects two votes, so instead of waiting an arbitrary amount - # of time, we can do two blocking calls to `get` - outpipe.get() - outpipe.get() - vote_pipeline.terminate() - - # retrive blocks from bigchain - blocks = [b.get_block(_id) for _id in block_ids] - - # retrieve votes - votes = [list(query.get_votes_by_block_id(b.connection, _id))[0] - for _id in block_ids] - - assert ({v['vote']['voting_for_block'] for v in votes} == - {block['id'] for block in blocks}) - - @pytest.mark.genesis def test_voter_checks_for_previous_vote(monkeypatch, b): from bigchaindb.backend import query From 99b477776608dbd30282b27753d6ec057d7752f0 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Thu, 13 Apr 2017 11:12:04 +0200 Subject: [PATCH 07/60] use MongoDBConnection.run() and fix changefeed reconnect --- bigchaindb/backend/mongodb/changefeed.py | 5 +++-- bigchaindb/backend/mongodb/query.py | 5 +++-- tests/backend/mongodb/test_changefeed.py | 3 +-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/bigchaindb/backend/mongodb/changefeed.py b/bigchaindb/backend/mongodb/changefeed.py index 59d53b72..c155bb6f 100644 --- a/bigchaindb/backend/mongodb/changefeed.py +++ b/bigchaindb/backend/mongodb/changefeed.py @@ -89,13 +89,14 @@ def run_changefeed(conn, table, last_ts): try: # XXX: hack to force reconnection, in case the connection # is lost while waiting on the cursor. See #1154. - conn.connection = 1 + conn._conn = None namespace = conn.dbname + '.' + table - cursor = conn.conn.local.oplog.rs.find( + query = conn.query().local.oplog.rs.find( {'ns': namespace, 'ts': {'$gt': last_ts}}, {'o._id': False}, cursor_type=pymongo.CursorType.TAILABLE_AWAIT ) + cursor = conn.run(query) logging.debug('Tailing oplog at %s/%s', namespace, last_ts) while cursor.alive: try: diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index a0d8ed12..17d50f02 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -286,8 +286,9 @@ def get_last_voted_block(conn, node_pubkey): @register_query(MongoDBConnection) def get_new_blocks_feed(conn, start_block_id): namespace = conn.dbname + '.bigchain' - query = {'o.id': start_block_id, 'op': 'i', 'ns': namespace} + match = {'o.id': start_block_id, 'op': 'i', 'ns': namespace} # Neccesary to find in descending order since tests may write same block id several times - last_ts = conn.conn.local.oplog.rs.find(query).sort('$natural', -1).next()['ts'] + query = conn.query().local.oplog.rs.find(match).sort('$natural', -1).next()['ts'] + last_ts = conn.run(query) feed = run_changefeed(conn, 'bigchain', last_ts) return (evt['o'] for evt in feed if evt['op'] == 'i') diff --git a/tests/backend/mongodb/test_changefeed.py b/tests/backend/mongodb/test_changefeed.py index cfe50e57..e1d11706 100644 --- a/tests/backend/mongodb/test_changefeed.py +++ b/tests/backend/mongodb/test_changefeed.py @@ -142,8 +142,7 @@ def test_connection_failure(): from bigchaindb.backend.mongodb.changefeed import run_changefeed conn = mock.MagicMock() - find = conn.conn.local.oplog.rs.find - find.side_effect = [ConnectionError(), RuntimeError()] + conn.run.side_effect = [ConnectionError(), RuntimeError()] changefeed = run_changefeed(conn, 'backlog', -1) with pytest.raises(RuntimeError): for record in changefeed: From 4f991227586dffe457c1fc04f4f7eba783bdd3a4 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Wed, 19 Apr 2017 15:47:58 +0200 Subject: [PATCH 08/60] fast unspents --- bigchaindb/backend/mongodb/query.py | 31 ++++++++++- bigchaindb/backend/query.py | 28 ++++++++++ bigchaindb/common/transaction.py | 3 + bigchaindb/core.py | 11 ++-- bigchaindb/fastquery.py | 56 +++++++++++++++++++ tests/backend/mongodb/test_queries.py | 51 +++++++++++++++++ tests/db/test_bigchain_api.py | 12 ++-- tests/test_fastquery.py | 80 +++++++++++++++++++++++++++ 8 files changed, 261 insertions(+), 11 deletions(-) create mode 100644 bigchaindb/fastquery.py create mode 100644 tests/test_fastquery.py diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 74b9c35a..d564e242 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -176,13 +176,33 @@ def get_spent(conn, transaction_id, output): @register_query(MongoDBConnection) -def get_owned_ids(conn, owner): +def get_spending_transactions(conn, inputs): + return conn.run( + conn.collection('bigchain').aggregate([ + {'$match': { + 'block.transactions.inputs.fulfills': { + '$in': inputs, + }, + }}, + {'$unwind': '$block.transactions'}, + {'$match': { + 'block.transactions.inputs.fulfills': { + '$in': inputs, + }, + }}, + ])) + + +@register_query(MongoDBConnection) +def get_owned_ids(conn, owner, unwrap=True): cursor = conn.run( conn.collection('bigchain').aggregate([ {'$match': {'block.transactions.outputs.public_keys': owner}}, {'$unwind': '$block.transactions'}, {'$match': {'block.transactions.outputs.public_keys': owner}} ])) + if not unwrap: + return cursor # we need to access some nested fields before returning so lets use a # generator to avoid having to read all records on the cursor at this point return (elem['block']['transactions'] for elem in cursor) @@ -196,6 +216,15 @@ def get_votes_by_block_id(conn, block_id): projection={'_id': False})) +@register_query(MongoDBConnection) +def get_votes_for_blocks_by_voter(conn, block_ids, node_pubkey): + return conn.run( + conn.collection('votes') + .find({'vote.voting_for_block': {'$in': block_ids}, + 'node_pubkey': node_pubkey}, + projection={'_id': False})) + + @register_query(MongoDBConnection) def get_votes_by_block_id_and_voter(conn, block_id, node_pubkey): return conn.run( diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 9aa653d7..5ae39557 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -140,6 +140,19 @@ def get_spent(connection, transaction_id, condition_id): raise NotImplementedError +@singledispatch +def get_spending_transactions(connection, inputs): + """Return transactions which spend given inputs + + Args: + inputs (list): list of {txid, output} + + Returns: + List of transactions that spend given inputs + """ + raise NotImplementedError + + @singledispatch def get_owned_ids(connection, owner): """Retrieve a list of `txids` that can we used has inputs. @@ -183,6 +196,21 @@ def get_votes_by_block_id_and_voter(connection, block_id, node_pubkey): raise NotImplementedError +@singledispatch +def get_votes_for_blocks_by_voter(connection, block_ids, pubkey): + """Return votes for many block_ids + + Args: + block_ids (set): block_ids + pubkey (str): public key of voting node + + Returns: + A cursor of votes matching given votes. + """ + + raise NotImplementedError + + @singledispatch def write_block(connection, block): """Write a block to the bigchain table. diff --git a/bigchaindb/common/transaction.py b/bigchaindb/common/transaction.py index e956812f..7d17172a 100644 --- a/bigchaindb/common/transaction.py +++ b/bigchaindb/common/transaction.py @@ -161,6 +161,9 @@ class TransactionLink(object): # TODO: If `other !== TransactionLink` return `False` return self.to_dict() == other.to_dict() + def __hash__(self): + return hash((self.txid, self.output)) + @classmethod def from_dict(cls, link): """Transforms a Python dictionary to a TransactionLink object. diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 5d2e9c03..be7cdcbc 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -8,7 +8,7 @@ from bigchaindb.common.transaction import TransactionLink import bigchaindb -from bigchaindb import backend, config_utils, utils +from bigchaindb import backend, config_utils, utils, fastquery from bigchaindb.consensus import BaseConsensusRules from bigchaindb.models import Block, Transaction @@ -433,14 +433,17 @@ class Bigchain(object): """ return self.get_outputs_filtered(owner, include_spent=False) + @property + def fastquery(self): + return fastquery.FastQuery(self.connection, self.me) + def get_outputs_filtered(self, owner, include_spent=True): """ Get a list of output links filtered on some criteria """ - outputs = self.get_outputs(owner) + outputs = self.fastquery.get_outputs_by_pubkey(owner) if not include_spent: - outputs = [o for o in outputs - if not self.get_spent(o.txid, o.output)] + outputs = self.fastquery.filter_spent_outputs(outputs) return outputs def get_transactions_filtered(self, asset_id, operation=None): diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py new file mode 100644 index 00000000..f2a8fb09 --- /dev/null +++ b/bigchaindb/fastquery.py @@ -0,0 +1,56 @@ +from bigchaindb.utils import output_has_owner +from bigchaindb.backend import query +from bigchaindb.common.transaction import TransactionLink + + +class FastQuery: + + """ + Database queries that join on block results from a single node. + + * Votes are not validated for security (security is replication concern) + * Votes come from only one node, and as such, fault tolerance is not provided + (elected Blockchain table not yet available) + + In return, these queries offer good performance, as it is not neccesary to validate + each result separately. + """ + + def __init__(self, connection, me): + self.connection = connection + self.me = me + + def filter_block_ids(self, block_ids, include_undecided=True): + votes = query.get_votes_for_blocks_by_voter( + self.connection, block_ids, self.me) + votes = {v['vote']['voting_for_block']: v['vote']['is_block_valid'] for v in votes} + return [b for b in block_ids if votes.get(b, include_undecided)] + + def filter_valid_blocks(self, blocks): + block_ids = list(set(b['id'] for b in blocks)) + valid_block_ids = self.filter_block_ids(block_ids) + return [b for b in blocks if b['id'] in valid_block_ids] + + def get_outputs_by_pubkey(self, pubkey): + cursor = query.get_owned_ids(self.connection, pubkey, unwrap=False) + wrapped_txs = self.filter_valid_blocks(list(cursor)) + txs = [wrapped['block']['transactions'] for wrapped in wrapped_txs] + return [TransactionLink(tx['id'], i) + for tx in txs + for i, o in enumerate(tx['outputs']) + if output_has_owner(o, pubkey)] + + def filter_spent_outputs(self, outputs): + """ + Remove outputs that have been spent + + Args: + outputs: list of TransactionLink + """ + links = [o.to_dict() for o in outputs] + wrapped = self.filter_valid_blocks( + list(query.get_spending_transactions(self.connection, links))) + spends = {TransactionLink.from_dict(input_['fulfills']) + for block in wrapped + for input_ in block['block']['transactions']['inputs']} + return [ff for ff in outputs if ff not in spends] diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index bd7e75f1..c8a03011 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -417,3 +417,54 @@ def test_get_txids_filtered(signed_create_tx, signed_transfer_tx): # Test get by asset and TRANSFER txids = set(query.get_txids_filtered(conn, asset_id, Transaction.TRANSFER)) assert txids == {signed_transfer_tx.id} + + +def test_get_spending_transactions(user_pk): + from bigchaindb.backend import connect, query + from bigchaindb.models import Block, Transaction + conn = connect() + + out = [([user_pk], 1)] + tx1 = Transaction.create([user_pk], out * 3) + inputs = tx1.to_inputs() + tx2 = Transaction.transfer([inputs[0]], out, tx1.id) + tx3 = Transaction.transfer([inputs[1]], out, tx1.id) + tx4 = Transaction.transfer([inputs[2]], out, tx1.id) + block = Block([tx1, tx2, tx3, tx4]) + conn.db.bigchain.insert_one(block.to_dict()) + + links = [inputs[0].fulfills.to_dict(), inputs[2].fulfills.to_dict()] + # discard block noise + res = [(r['id'], r['block']['transactions']) + for r in list(query.get_spending_transactions(conn, links))] + + # tx3 not a member because input 1 not asked for + assert res == [(block.id, tx2.to_dict()), (block.id, tx4.to_dict())] + + +def test_get_votes_for_blocks_by_voter(): + from bigchaindb.backend import connect, query + + conn = connect() + votes = [ + { + 'node_pubkey': 'a', + 'vote': {'voting_for_block': 'block1'}, + }, + { + 'node_pubkey': 'b', + 'vote': {'voting_for_block': 'block1'}, + }, + { + 'node_pubkey': 'a', + 'vote': {'voting_for_block': 'block2'}, + }, + { + 'node_pubkey': 'a', + 'vote': {'voting_for_block': 'block3'}, + } + ] + for vote in votes: + conn.db.votes.insert_one(vote.copy()) + res = query.get_votes_for_blocks_by_voter(conn, ['block1', 'block2'], 'a') + assert list(res) == [votes[0], votes[2]] diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 3f05385c..589671e5 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -1119,11 +1119,11 @@ def test_get_owned_ids_calls_get_outputs_filtered(): def test_get_outputs_filtered_only_unspent(): from bigchaindb.common.transaction import TransactionLink from bigchaindb.core import Bigchain - with patch('bigchaindb.core.Bigchain.get_outputs') as get_outputs: + with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_pubkey') as get_outputs: get_outputs.return_value = [TransactionLink('a', 1), TransactionLink('b', 2)] - with patch('bigchaindb.core.Bigchain.get_spent') as get_spent: - get_spent.side_effect = [True, False] + with patch('bigchaindb.fastquery.FastQuery.filter_spent_outputs') as filter_spent: + filter_spent.return_value = [TransactionLink('b', 2)] out = Bigchain().get_outputs_filtered('abc', include_spent=False) get_outputs.assert_called_once_with('abc') assert out == [TransactionLink('b', 2)] @@ -1132,13 +1132,13 @@ def test_get_outputs_filtered_only_unspent(): def test_get_outputs_filtered(): from bigchaindb.common.transaction import TransactionLink from bigchaindb.core import Bigchain - with patch('bigchaindb.core.Bigchain.get_outputs') as get_outputs: + with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_pubkey') as get_outputs: get_outputs.return_value = [TransactionLink('a', 1), TransactionLink('b', 2)] - with patch('bigchaindb.core.Bigchain.get_spent') as get_spent: + with patch('bigchaindb.fastquery.FastQuery.filter_spent_outputs') as filter_spent: out = Bigchain().get_outputs_filtered('abc') get_outputs.assert_called_once_with('abc') - get_spent.assert_not_called() + filter_spent.assert_not_called() assert out == get_outputs.return_value diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py new file mode 100644 index 00000000..b730eee9 --- /dev/null +++ b/tests/test_fastquery.py @@ -0,0 +1,80 @@ +import pytest + +from bigchaindb.common.transaction import TransactionLink +from bigchaindb.models import Block, Transaction + +pytestmark = pytest.mark.bdb + + +@pytest.fixture +def blockdata(b, user_pk, user2_pk): + txs = [Transaction.create([user_pk], [([user2_pk], 1)]), + Transaction.create([user2_pk], [([user_pk], 1)]), + Transaction.create([user_pk], [([user_pk], 1), ([user2_pk], 1)])] + blocks = [] + for i in range(3): + block = Block([txs[i]]) + b.write_block(block) + blocks.append(block.to_dict()) + if i > 0: + b.write_vote(b.vote(block.id, '', i % 2 == 1)) + return blocks, [b['id'] for b in blocks] + + +def test_filter_block_ids_with_undecided(b, blockdata): + blocks, block_ids = blockdata + valid_ids = b.fastquery.filter_block_ids(block_ids) + assert set(valid_ids) == {blocks[0]['id'], blocks[1]['id']} + + +def test_filter_block_ids_only_valid(b, blockdata): + blocks, block_ids = blockdata + valid_ids = b.fastquery.filter_block_ids(block_ids, include_undecided=False) + assert set(valid_ids) == {blocks[1]['id']} + + +def test_filter_valid_blocks(b, blockdata): + blocks, _ = blockdata + assert b.fastquery.filter_valid_blocks(blocks) == [blocks[0], blocks[1]] + + +def test_get_outputs_by_pubkey(b, user_pk, user2_pk, blockdata): + blocks, _ = blockdata + assert b.fastquery.get_outputs_by_pubkey(user_pk) == [ + TransactionLink(blocks[1]['block']['transactions'][0]['id'], 0) + ] + assert b.fastquery.get_outputs_by_pubkey(user2_pk) == [ + TransactionLink(blocks[0]['block']['transactions'][0]['id'], 0) + ] + + +def test_filter_spent_outputs(b, user_pk): + out = [([user_pk], 1)] + tx1 = Transaction.create([user_pk], out * 3) + inputs = tx1.to_inputs() + tx2 = Transaction.transfer([inputs[0]], out, tx1.id) + tx3 = Transaction.transfer([inputs[1]], out, tx1.id) + tx4 = Transaction.transfer([inputs[2]], out, tx1.id) + + for tx in [tx1, tx2]: + block = Block([tx]) + b.write_block(block) + b.write_vote(b.vote(block.id, '', True)) + + # mark invalid + block = Block([tx3]) + b.write_block(block) + b.write_vote(b.vote(block.id, '', False)) + + # undecided + block = Block([tx4]) + b.write_block(block) + + unspents = b.fastquery.filter_spent_outputs( + b.fastquery.get_outputs_by_pubkey(user_pk)) + + assert set(unspents) == { + inputs[1].fulfills, + tx2.to_inputs()[0].fulfills, + tx4.to_inputs()[0].fulfills + } From 5b6fa13d798a6bed27fc662f1db6b2a11e7ef885 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Thu, 20 Apr 2017 16:58:29 +0200 Subject: [PATCH 09/60] fast unspent queries for RethinkDB --- bigchaindb/backend/rethinkdb/query.py | 38 +++++++++++++++++++++++---- bigchaindb/fastquery.py | 4 +-- tests/test_fastquery.py | 13 ++++++--- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index 6011cc8c..6a2c644e 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -120,14 +120,16 @@ def get_spent(connection, transaction_id, output): @register_query(RethinkDBConnection) -def get_owned_ids(connection, owner): - return connection.run( - r.table('bigchain', read_mode=READ_MODE) +def get_owned_ids(connection, owner, unwrap=True): + query = (r.table('bigchain', read_mode=READ_MODE) .get_all(owner, index='outputs') .distinct() - .concat_map(lambda doc: doc['block']['transactions']) - .filter(lambda tx: tx['outputs'].contains( + .concat_map(unroll_block_transactions) + .filter(lambda doc: doc['block']['transactions']['outputs'].contains( lambda c: c['public_keys'].contains(owner)))) + if unwrap: + query = query.map(lambda doc: doc['block']['transactions']) + return connection.run(query) @register_query(RethinkDBConnection) @@ -253,3 +255,29 @@ def get_unvoted_blocks(connection, node_pubkey): # database level. Solving issue #444 can help untangling the situation unvoted_blocks = filter(lambda block: not utils.is_genesis_block(block), unvoted) return unvoted_blocks + + +@register_query(RethinkDBConnection) +def get_votes_for_blocks_by_voter(connection, block_ids, node_pubkey): + return connection.run( + r.table('votes') + .filter(lambda row: r.expr(block_ids).contains(row['vote']['voting_for_block'])) + .filter(lambda row: row['node_pubkey'] == node_pubkey)) + + +def unroll_block_transactions(block): + """ Simulate unrolling a transaction into block in MongoDB """ + return block['block']['transactions'].map( + lambda tx: block.merge({'block': {'transactions': tx}})) + + +@register_query(RethinkDBConnection) +def get_spending_transactions(connection, links): + query = ( + r.table('bigchain') + .get_all(*[(l['txid'], l['output']) for l in links], index='inputs') + .concat_map(unroll_block_transactions) + .filter(lambda doc: r.expr(links).set_intersection( + doc['block']['transactions']['inputs'].map(lambda i: i['fulfills']))) + ) + return connection.run(query) diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py index f2a8fb09..7b73ab64 100644 --- a/bigchaindb/fastquery.py +++ b/bigchaindb/fastquery.py @@ -48,8 +48,8 @@ class FastQuery: outputs: list of TransactionLink """ links = [o.to_dict() for o in outputs] - wrapped = self.filter_valid_blocks( - list(query.get_spending_transactions(self.connection, links))) + spending_txs = query.get_spending_transactions(self.connection, links) + wrapped = self.filter_valid_blocks(list(spending_txs)) spends = {TransactionLink.from_dict(input_['fulfills']) for block in wrapped for input_ in block['block']['transactions']['inputs']} diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py index b730eee9..8621751a 100644 --- a/tests/test_fastquery.py +++ b/tests/test_fastquery.py @@ -51,27 +51,32 @@ def test_get_outputs_by_pubkey(b, user_pk, user2_pk, blockdata): def test_filter_spent_outputs(b, user_pk): out = [([user_pk], 1)] tx1 = Transaction.create([user_pk], out * 3) + + # There are 3 inputs inputs = tx1.to_inputs() + + # Each spent individually tx2 = Transaction.transfer([inputs[0]], out, tx1.id) tx3 = Transaction.transfer([inputs[1]], out, tx1.id) tx4 = Transaction.transfer([inputs[2]], out, tx1.id) + # The CREATE and first TRANSFER are valid. tx2 produces a new unspent. for tx in [tx1, tx2]: block = Block([tx]) b.write_block(block) b.write_vote(b.vote(block.id, '', True)) - # mark invalid + # The second TRANSFER is invalid. inputs[1] remains unspent. block = Block([tx3]) b.write_block(block) b.write_vote(b.vote(block.id, '', False)) - # undecided + # The third TRANSFER is undecided. It procuces a new unspent. block = Block([tx4]) b.write_block(block) - unspents = b.fastquery.filter_spent_outputs( - b.fastquery.get_outputs_by_pubkey(user_pk)) + outputs = b.fastquery.get_outputs_by_pubkey(user_pk) + unspents = b.fastquery.filter_spent_outputs(outputs) assert set(unspents) == { inputs[1].fulfills, From 29247a9994758e2d511a2f686abb3cc4b8f94e54 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Thu, 20 Apr 2017 16:58:40 +0200 Subject: [PATCH 10/60] test clarification --- tests/test_fastquery.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py index 8621751a..200bd7f9 100644 --- a/tests/test_fastquery.py +++ b/tests/test_fastquery.py @@ -16,8 +16,8 @@ def blockdata(b, user_pk, user2_pk): block = Block([txs[i]]) b.write_block(block) blocks.append(block.to_dict()) - if i > 0: - b.write_vote(b.vote(block.id, '', i % 2 == 1)) + b.write_vote(b.vote(blocks[1]['id'], '', True)) + b.write_vote(b.vote(blocks[2]['id'], '', False)) return blocks, [b['id'] for b in blocks] From 5d767c1162effa3f657f5d6c5c5a3594bdc11662 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Fri, 21 Apr 2017 12:23:53 +0200 Subject: [PATCH 11/60] light refactoring to remove some redundancies and fix test coverage --- bigchaindb/backend/mongodb/query.py | 11 +++--- bigchaindb/backend/rethinkdb/query.py | 19 +++++------ bigchaindb/core.py | 48 +-------------------------- bigchaindb/fastquery.py | 32 ++++++++++-------- tests/backend/mongodb/test_queries.py | 9 +++-- tests/backend/test_generics.py | 2 ++ tests/test_fastquery.py | 3 +- 7 files changed, 41 insertions(+), 83 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index d564e242..0eab8c6b 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -177,7 +177,7 @@ def get_spent(conn, transaction_id, output): @register_query(MongoDBConnection) def get_spending_transactions(conn, inputs): - return conn.run( + cursor = conn.run( conn.collection('bigchain').aggregate([ {'$match': { 'block.transactions.inputs.fulfills': { @@ -191,21 +191,18 @@ def get_spending_transactions(conn, inputs): }, }}, ])) + return ((b['id'], b['block']['transactions']) for b in cursor) @register_query(MongoDBConnection) -def get_owned_ids(conn, owner, unwrap=True): +def get_owned_ids(conn, owner): cursor = conn.run( conn.collection('bigchain').aggregate([ {'$match': {'block.transactions.outputs.public_keys': owner}}, {'$unwind': '$block.transactions'}, {'$match': {'block.transactions.outputs.public_keys': owner}} ])) - if not unwrap: - return cursor - # we need to access some nested fields before returning so lets use a - # generator to avoid having to read all records on the cursor at this point - return (elem['block']['transactions'] for elem in cursor) + return ((b['id'], b['block']['transactions']) for b in cursor) @register_query(MongoDBConnection) diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index 6a2c644e..fca65bdf 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -120,16 +120,15 @@ def get_spent(connection, transaction_id, output): @register_query(RethinkDBConnection) -def get_owned_ids(connection, owner, unwrap=True): +def get_owned_ids(connection, owner): query = (r.table('bigchain', read_mode=READ_MODE) .get_all(owner, index='outputs') .distinct() .concat_map(unroll_block_transactions) - .filter(lambda doc: doc['block']['transactions']['outputs'].contains( + .filter(lambda doc: doc['tx']['outputs'].contains( lambda c: c['public_keys'].contains(owner)))) - if unwrap: - query = query.map(lambda doc: doc['block']['transactions']) - return connection.run(query) + cursor = connection.run(query) + return ((b['id'], b['tx']) for b in cursor) @register_query(RethinkDBConnection) @@ -266,9 +265,8 @@ def get_votes_for_blocks_by_voter(connection, block_ids, node_pubkey): def unroll_block_transactions(block): - """ Simulate unrolling a transaction into block in MongoDB """ - return block['block']['transactions'].map( - lambda tx: block.merge({'block': {'transactions': tx}})) + """ Unroll block transactions """ + return block['block']['transactions'].map(lambda tx: block.merge({'tx': tx})) @register_query(RethinkDBConnection) @@ -278,6 +276,7 @@ def get_spending_transactions(connection, links): .get_all(*[(l['txid'], l['output']) for l in links], index='inputs') .concat_map(unroll_block_transactions) .filter(lambda doc: r.expr(links).set_intersection( - doc['block']['transactions']['inputs'].map(lambda i: i['fulfills']))) + doc['tx']['inputs'].map(lambda i: i['fulfills']))) ) - return connection.run(query) + cursor = connection.run(query) + return ((b['id'], b['tx']) for b in cursor) diff --git a/bigchaindb/core.py b/bigchaindb/core.py index be7cdcbc..63cab1d2 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -4,11 +4,10 @@ from time import time from bigchaindb import exceptions as core_exceptions from bigchaindb.common import crypto, exceptions from bigchaindb.common.utils import gen_timestamp, serialize -from bigchaindb.common.transaction import TransactionLink import bigchaindb -from bigchaindb import backend, config_utils, utils, fastquery +from bigchaindb import backend, config_utils, fastquery from bigchaindb.consensus import BaseConsensusRules from bigchaindb.models import Block, Transaction @@ -376,51 +375,6 @@ class Bigchain(object): # Either no transaction was returned spending the `(txid, output)` as # input or the returned transactions are not valid. - def get_outputs(self, owner): - """Retrieve a list of links to transaction outputs for a given public - key. - - Args: - owner (str): base58 encoded public key. - - Returns: - :obj:`list` of TransactionLink: list of ``txid`` s and ``output`` s - pointing to another transaction's condition - """ - # get all transactions in which owner is in the `owners_after` list - response = backend.query.get_owned_ids(self.connection, owner) - return [ - TransactionLink(tx['id'], index) - for tx in response - if not self.is_tx_strictly_in_invalid_block(tx['id']) - for index, output in enumerate(tx['outputs']) - if utils.output_has_owner(output, owner) - ] - - def is_tx_strictly_in_invalid_block(self, txid): - """ - Checks whether the transaction with the given ``txid`` - *strictly* belongs to an invalid block. - - Args: - txid (str): Transaction id. - - Returns: - bool: ``True`` if the transaction *strictly* belongs to a - block that is invalid. ``False`` otherwise. - - Note: - Since a transaction may be in multiple blocks, with - different statuses, the term "strictly" is used to - emphasize that if a transaction is said to be in an invalid - block, it means that it is not in any other block that is - either valid or undecided. - - """ - validity = self.get_blocks_status_containing_tx(txid) - return (Bigchain.BLOCK_VALID not in validity.values() and - Bigchain.BLOCK_UNDECIDED not in validity.values()) - def get_owned_ids(self, owner): """Retrieve a list of ``txid`` s that can be used as inputs. diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py index 7b73ab64..a7147722 100644 --- a/bigchaindb/fastquery.py +++ b/bigchaindb/fastquery.py @@ -9,8 +9,7 @@ class FastQuery: Database queries that join on block results from a single node. * Votes are not validated for security (security is replication concern) - * Votes come from only one node, and as such, fault tolerance is not provided - (elected Blockchain table not yet available) + * Votes come from only one node, and as such, fault tolerance is reduced In return, these queries offer good performance, as it is not neccesary to validate each result separately. @@ -21,20 +20,27 @@ class FastQuery: self.me = me def filter_block_ids(self, block_ids, include_undecided=True): + """ + Given block ids, filter the invalid blocks + """ + block_ids = list(set(block_ids)) votes = query.get_votes_for_blocks_by_voter( self.connection, block_ids, self.me) votes = {v['vote']['voting_for_block']: v['vote']['is_block_valid'] for v in votes} return [b for b in block_ids if votes.get(b, include_undecided)] - def filter_valid_blocks(self, blocks): - block_ids = list(set(b['id'] for b in blocks)) - valid_block_ids = self.filter_block_ids(block_ids) - return [b for b in blocks if b['id'] in valid_block_ids] + def filter_valid_blocks(self, blocks, key=lambda b: b[0]): + """ + Given things with block ids, remove the invalid ones. + """ + blocks = list(blocks) + valid_block_ids = set(self.filter_block_ids(key(b) for b in blocks)) + return [b for b in blocks if key(b) in valid_block_ids] def get_outputs_by_pubkey(self, pubkey): - cursor = query.get_owned_ids(self.connection, pubkey, unwrap=False) - wrapped_txs = self.filter_valid_blocks(list(cursor)) - txs = [wrapped['block']['transactions'] for wrapped in wrapped_txs] + """ Get outputs for a public key """ + res = list(query.get_owned_ids(self.connection, pubkey)) + txs = [tx for _, tx in self.filter_valid_blocks(res)] return [TransactionLink(tx['id'], i) for tx in txs for i, o in enumerate(tx['outputs']) @@ -48,9 +54,9 @@ class FastQuery: outputs: list of TransactionLink """ links = [o.to_dict() for o in outputs] - spending_txs = query.get_spending_transactions(self.connection, links) - wrapped = self.filter_valid_blocks(list(spending_txs)) + res = query.get_spending_transactions(self.connection, links) + txs = [tx for _, tx in self.filter_valid_blocks(res)] spends = {TransactionLink.from_dict(input_['fulfills']) - for block in wrapped - for input_ in block['block']['transactions']['inputs']} + for tx in txs + for input_ in tx['inputs']} return [ff for ff in outputs if ff not in spends] diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index c8a03011..426ac12d 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -205,10 +205,10 @@ def test_get_owned_ids(signed_create_tx, user_pk): block = Block(transactions=[signed_create_tx]) conn.db.bigchain.insert_one(block.to_dict()) - owned_ids = list(query.get_owned_ids(conn, user_pk)) + [(block_id, tx)] = list(query.get_owned_ids(conn, user_pk)) - assert len(owned_ids) == 1 - assert owned_ids[0] == signed_create_tx.to_dict() + assert block_id == block.id + assert tx == signed_create_tx.to_dict() def test_get_votes_by_block_id(signed_create_tx, structurally_valid_vote): @@ -435,8 +435,7 @@ def test_get_spending_transactions(user_pk): links = [inputs[0].fulfills.to_dict(), inputs[2].fulfills.to_dict()] # discard block noise - res = [(r['id'], r['block']['transactions']) - for r in list(query.get_spending_transactions(conn, links))] + res = list(query.get_spending_transactions(conn, links)) # tx3 not a member because input 1 not asked for assert res == [(block.id, tx2.to_dict()), (block.id, tx4.to_dict())] diff --git a/tests/backend/test_generics.py b/tests/backend/test_generics.py index 57a644ee..581a0b31 100644 --- a/tests/backend/test_generics.py +++ b/tests/backend/test_generics.py @@ -36,6 +36,8 @@ def test_schema(schema_func_name, args_qty): ('get_votes_by_block_id_and_voter', 2), ('update_transaction', 2), ('get_transaction_from_block', 2), + ('get_votes_for_blocks_by_voter', 2), + ('get_spending_transactions', 1), )) def test_query(query_func_name, args_qty): from bigchaindb.backend import query diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py index 200bd7f9..c54ef10e 100644 --- a/tests/test_fastquery.py +++ b/tests/test_fastquery.py @@ -35,7 +35,8 @@ def test_filter_block_ids_only_valid(b, blockdata): def test_filter_valid_blocks(b, blockdata): blocks, _ = blockdata - assert b.fastquery.filter_valid_blocks(blocks) == [blocks[0], blocks[1]] + assert (b.fastquery.filter_valid_blocks(blocks, key=lambda b: b['id']) + == [blocks[0], blocks[1]]) def test_get_outputs_by_pubkey(b, user_pk, user2_pk, blockdata): From 2200a7bda417e4d5ef4fd1f3297e7231249dbb85 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 24 Apr 2017 12:21:00 +0200 Subject: [PATCH 12/60] cleanup --- bigchaindb/backend/query.py | 1 - bigchaindb/backend/rethinkdb/query.py | 9 +++++---- tests/backend/mongodb/test_queries.py | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 5ae39557..0f746132 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -207,7 +207,6 @@ def get_votes_for_blocks_by_voter(connection, block_ids, pubkey): Returns: A cursor of votes matching given votes. """ - raise NotImplementedError diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index fca65bdf..18116fc1 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -124,7 +124,7 @@ def get_owned_ids(connection, owner): query = (r.table('bigchain', read_mode=READ_MODE) .get_all(owner, index='outputs') .distinct() - .concat_map(unroll_block_transactions) + .concat_map(unwind_block_transactions) .filter(lambda doc: doc['tx']['outputs'].contains( lambda c: c['public_keys'].contains(owner)))) cursor = connection.run(query) @@ -264,8 +264,8 @@ def get_votes_for_blocks_by_voter(connection, block_ids, node_pubkey): .filter(lambda row: row['node_pubkey'] == node_pubkey)) -def unroll_block_transactions(block): - """ Unroll block transactions """ +def unwind_block_transactions(block): + """ Yield a block for each transaction in given block """ return block['block']['transactions'].map(lambda tx: block.merge({'tx': tx})) @@ -274,7 +274,8 @@ def get_spending_transactions(connection, links): query = ( r.table('bigchain') .get_all(*[(l['txid'], l['output']) for l in links], index='inputs') - .concat_map(unroll_block_transactions) + .concat_map(unwind_block_transactions) + # filter transactions spending output .filter(lambda doc: r.expr(links).set_intersection( doc['tx']['inputs'].map(lambda i: i['fulfills']))) ) diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index 426ac12d..0c295b9b 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -434,7 +434,6 @@ def test_get_spending_transactions(user_pk): conn.db.bigchain.insert_one(block.to_dict()) links = [inputs[0].fulfills.to_dict(), inputs[2].fulfills.to_dict()] - # discard block noise res = list(query.get_spending_transactions(conn, links)) # tx3 not a member because input 1 not asked for From 01ba01083d26ba2c1d22eeedd4089a718f9589a9 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Tue, 25 Apr 2017 13:12:32 +0200 Subject: [PATCH 13/60] update comments --- bigchaindb/backend/query.py | 9 +++++---- bigchaindb/fastquery.py | 17 ++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 0f746132..6f76249b 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -148,7 +148,8 @@ def get_spending_transactions(connection, inputs): inputs (list): list of {txid, output} Returns: - List of transactions that spend given inputs + Iterator of (block_ids, transaction) for transactions that + spend given inputs. """ raise NotImplementedError @@ -161,9 +162,9 @@ def get_owned_ids(connection, owner): owner (str): base58 encoded public key. Returns: - A cursor for the matching transactions. + Iterator of (block_id, transaction) for transactions + that list given owner in conditions. """ - raise NotImplementedError @@ -205,7 +206,7 @@ def get_votes_for_blocks_by_voter(connection, block_ids, pubkey): pubkey (str): public key of voting node Returns: - A cursor of votes matching given votes. + A cursor of votes matching given block_ids and public key """ raise NotImplementedError diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py index a7147722..deffabe1 100644 --- a/bigchaindb/fastquery.py +++ b/bigchaindb/fastquery.py @@ -4,24 +4,27 @@ from bigchaindb.common.transaction import TransactionLink class FastQuery: - """ Database queries that join on block results from a single node. - * Votes are not validated for security (security is replication concern) - * Votes come from only one node, and as such, fault tolerance is reduced + * Votes are not validated for security (security is a replication concern) + * Votes come from only one node, and as such, non-byzantine fault tolerance + is reduced. - In return, these queries offer good performance, as it is not neccesary to validate - each result separately. + Previously, to consider the status of a block, all votes for that block + were retrieved and the election results were counted. This meant that a + faulty node may still have been able to obtain a correct election result. + However, from the point of view of a client, it is still neccesary to + query multiple nodes to insure against getting an incorrect response from + a byzantine node. """ - def __init__(self, connection, me): self.connection = connection self.me = me def filter_block_ids(self, block_ids, include_undecided=True): """ - Given block ids, filter the invalid blocks + Given block ids, filter the invalid blocks. """ block_ids = list(set(block_ids)) votes = query.get_votes_for_blocks_by_voter( From a44523081a8ce3e6ecfaedb9a268ba781b8a8c84 Mon Sep 17 00:00:00 2001 From: KURO1 <412681778@qq.com> Date: Fri, 28 Apr 2017 10:39:03 +0800 Subject: [PATCH 14/60] Fix typo in docs --- docs/root/source/bft.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/root/source/bft.md b/docs/root/source/bft.md index 0255c7b5..cde6834a 100644 --- a/docs/root/source/bft.md +++ b/docs/root/source/bft.md @@ -4,7 +4,7 @@ We have Byzantine fault tolerance (BFT) in our roadmap, as a switch that people Among the big, industry-used distributed databases in production today (e.g. DynamoDB, Bigtable, MongoDB, Cassandra, Elasticsearch), none of them are BFT. Indeed, almost all wide-area distributed systems in production are not BFT, including military, banking, healthcare, and other security-sensitive systems. -The are many more practical things that nodes can do to increase security (e.g. firewalls, key management, access controls). +There are many more practical things that nodes can do to increase security (e.g. firewalls, key management, access controls). From a [recent essay by Ken Birman](http://sigops.org/sosp/sosp15/history/05-birman.pdf) (of Cornell): From 3564e25d3f5eb85d41a85658d12682156ab07624 Mon Sep 17 00:00:00 2001 From: diminator Date: Mon, 1 May 2017 13:17:58 +0200 Subject: [PATCH 15/60] check output conditions with single statement --- bigchaindb/utils.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/bigchaindb/utils.py b/bigchaindb/utils.py index f87916b7..4dad9377 100644 --- a/bigchaindb/utils.py +++ b/bigchaindb/utils.py @@ -114,16 +114,11 @@ def condition_details_has_owner(condition_details, owner): def output_has_owner(output, owner): - # TODO - # Check whether it is really necessary to treat the single key case - # differently from the multiple keys case, and why not just use the same - # function for both cases. - if len(output['public_keys']) > 1: + if len(output['public_keys']) > 0: return condition_details_has_owner( output['condition']['details'], owner) - elif len(output['public_keys']) == 1: - return output['condition']['details']['public_key'] == owner - # TODO raise proper exception, e.g. invalid tx payload? + return False + # # TODO raise proper exception, e.g. invalid tx payload? def is_genesis_block(block): From 07a9e69933d61e7f5c4d25e64275645e3e7d6452 Mon Sep 17 00:00:00 2001 From: Krish Date: Tue, 2 May 2017 12:20:27 +0200 Subject: [PATCH 16/60] WebSocket support in NGINX (#1439) * Open port in Dockerfile * Added the BIGCHAINDB_WS_FRONTEND_PORT and BIGCHAINDB_WS_BACKEND_PORT as parameters. --- k8s/nginx/container/Dockerfile | 2 +- k8s/nginx/container/README.md | 9 +++++++++ k8s/nginx/container/nginx.conf.template | 20 ++++++++++++++++++++ k8s/nginx/container/nginx_entrypoint.bash | 6 ++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/k8s/nginx/container/Dockerfile b/k8s/nginx/container/Dockerfile index c6c4dd3f..04c69e84 100644 --- a/k8s/nginx/container/Dockerfile +++ b/k8s/nginx/container/Dockerfile @@ -7,5 +7,5 @@ RUN apt-get update \ && apt-get clean COPY nginx.conf.template /etc/nginx/nginx.conf COPY nginx_entrypoint.bash / -EXPOSE 80 443 27017 +EXPOSE 80 81 443 444 27017 ENTRYPOINT ["/nginx_entrypoint.bash"] diff --git a/k8s/nginx/container/README.md b/k8s/nginx/container/README.md index 30f42bfe..5a1e1273 100644 --- a/k8s/nginx/container/README.md +++ b/k8s/nginx/container/README.md @@ -40,6 +40,8 @@ docker run \ --env "BIGCHAINDB_FRONTEND_PORT=" \ --env "BIGCHAINDB_BACKEND_HOST=" \ --env "BIGCHAINDB_BACKEND_PORT=" \ +--env "BIGCHAINDB_WS_BACKEND_PORT=" \ +--env "BIGCHAINDB_WS_FRONTEND_PORT=" \ --env "MONGODB_WHITELIST=" \ --env "DNS_SERVER=" \ --name=ngx \ @@ -59,6 +61,8 @@ docker run \ --env "BIGCHAINDB_FRONTEND_PORT=80" \ --env "BIGCHAINDB_BACKEND_HOST=localhost" \ --env "BIGCHAINDB_BACKEND_PORT=9984" \ +--env="BIGCHAINDB_WS_FRONTEND_PORT=81" \ +--env="BIGCHAINDB_WS_BACKEND_PORT=9985" \ --env "MONGODB_WHITELIST=192.168.0.0/16:10.0.2.0/24" \ --name=ngx \ --publish=80:80 \ @@ -67,3 +71,8 @@ docker run \ bigchaindb/nginx ``` +### Note: +You can test the WebSocket server by using +[wsc](https://slack-redir.net/link?url=https%3A%2F%2Fwww.npmjs.com%2Fpackage%2Fwsc) tool with a command like: +`wsc -er ws://localhost:9985/api/v1/streams/valid_tx`. + diff --git a/k8s/nginx/container/nginx.conf.template b/k8s/nginx/container/nginx.conf.template index 6167dceb..bae805a5 100644 --- a/k8s/nginx/container/nginx.conf.template +++ b/k8s/nginx/container/nginx.conf.template @@ -63,6 +63,26 @@ http { proxy_pass http://$bdb_backend:BIGCHAINDB_BACKEND_PORT; } } + + server { + listen BIGCHAINDB_WS_FRONTEND_PORT; + underscores_in_headers on; + + # keepalive connection settings + keepalive_timeout 20s; + + # `slowloris` attack mitigation settings + client_body_timeout 10s; + client_header_timeout 10s; + + location /api/v1/streams/ { + proxy_read_timeout 300s; + proxy_pass http://$bdb_backend:BIGCHAINDB_WS_BACKEND_PORT; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + } + } } # NGINX stream block for TCP and UDP proxies diff --git a/k8s/nginx/container/nginx_entrypoint.bash b/k8s/nginx/container/nginx_entrypoint.bash index e40d89f4..49578433 100755 --- a/k8s/nginx/container/nginx_entrypoint.bash +++ b/k8s/nginx/container/nginx_entrypoint.bash @@ -7,6 +7,8 @@ mongo_backend_port=`printenv MONGODB_BACKEND_PORT` bdb_frontend_port=`printenv BIGCHAINDB_FRONTEND_PORT` bdb_backend_host=`printenv BIGCHAINDB_BACKEND_HOST` bdb_backend_port=`printenv BIGCHAINDB_BACKEND_PORT` +bdb_ws_frontend_port=`printenv BIGCHAINDB_WS_FRONTEND_PORT` +bdb_ws_backend_port=`printenv BIGCHAINDB_WS_BACKEND_PORT` mongo_whitelist=`printenv MONGODB_WHITELIST` dns_server=`printenv DNS_SERVER` @@ -17,6 +19,8 @@ if [[ -z "${mongo_frontend_port}" || \ -z "${bdb_frontend_port}" || \ -z "${bdb_backend_host}" || \ -z "${bdb_backend_port}" || \ + -z "${bdb_ws_backend_port}" || \ + -z "${bdb_ws_frontend_port}" || \ -z "${dns_server}" ]] ; then echo "Invalid environment settings detected. Exiting!" exit 1 @@ -31,6 +35,8 @@ sed -i "s|MONGODB_BACKEND_PORT|${mongo_backend_port}|g" $NGINX_CONF_FILE sed -i "s|BIGCHAINDB_FRONTEND_PORT|${bdb_frontend_port}|g" $NGINX_CONF_FILE sed -i "s|BIGCHAINDB_BACKEND_HOST|${bdb_backend_host}|g" $NGINX_CONF_FILE sed -i "s|BIGCHAINDB_BACKEND_PORT|${bdb_backend_port}|g" $NGINX_CONF_FILE +sed -i "s|BIGCHAINDB_WS_FRONTEND_PORT|${bdb_ws_frontend_port}|g" $NGINX_CONF_FILE +sed -i "s|BIGCHAINDB_WS_BACKEND_PORT|${bdb_ws_backend_port}|g" $NGINX_CONF_FILE sed -i "s|DNS_SERVER|${dns_server}|g" $NGINX_CONF_FILE # populate the whitelist in the conf file as per MONGODB_WHITELIST env var From 8297ce92600c8955656df516078666bcef5b5f82 Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Thu, 4 May 2017 15:04:54 +0200 Subject: [PATCH 17/60] Minor edits & fixes to WebSocket API docs --- .../source/websocket-event-stream-api.rst | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/docs/server/source/websocket-event-stream-api.rst b/docs/server/source/websocket-event-stream-api.rst index 3ce86553..0310ef63 100644 --- a/docs/server/source/websocket-event-stream-api.rst +++ b/docs/server/source/websocket-event-stream-api.rst @@ -8,9 +8,8 @@ The WebSocket Event Stream API BigchainDB provides real-time event streams over the WebSocket protocol with the Event Stream API. - Connecting to an event stream from your application enables a BigchainDB node -to notify you as events are processed, such as new `validated transactions <#valid-transactions>`_. +to notify you as events occur, such as new `validated transactions <#valid-transactions>`_. Demoing the API @@ -23,25 +22,31 @@ to familiarize yourself before attempting an integration. Determining Support for the Event Stream API -------------------------------------------- -In practice, it's a good idea to make sure that the node you're connecting with +It's a good idea to make sure that the node you're connecting with has advertised support for the Event Stream API. To do so, send a HTTP GET -request to the node's :ref:`Root URL ` and check that the -response contains a ``streams_`` property in ``_links``:: +request to the node's :ref:`API Root Endpoint` +(e.g. ``http://localhost:9984/api/v1/``) and check that the +response contains a ``streams_`` property in ``_links``: + +.. code:: JSON { "_links": { - "streams_v1": "ws://example.com:9985/api/v1/streams/" + ..., + "streams_v1": "ws://example.com:9985/api/v1/streams/valid_tx", + ... } } -Connection Keep Alive -~~~~~~~~~~~~~~~~~~~~~ +Connection Keep-Alive +--------------------- The Event Stream API initially does not provide any mechanisms for connection -keep alive other than enabling TCP keepalive on each open WebSocket connection. +keep-alive other than enabling TCP keepalive on each open WebSocket connection. In the future, we may add additional functionality to handle ping/pong frames -or payloads designed for keep alive. +or payloads designed for keep-alive. + Streams ------- @@ -54,8 +59,8 @@ Streams will always be under the WebSocket protocol (so ``ws://`` or ``wss://``) and accessible as extensions to the ``/api/v/streams/`` API root URL (for example, `validated transactions <#valid-transactions>`_ would be accessible under ``/api/v1/streams/valid_tx``). If you're running your -own BigchainDB instance and need help determining its root URL, you can find -more :ref:`here `. +own BigchainDB instance and need help determining its root URL, +then see the page titled :ref:`Determining the API Root URL`. All messages sent in a stream are in the JSON format. @@ -68,7 +73,7 @@ All messages sent in a stream are in the JSON format. as a specific ``output``'s ``public_key``. If you have specific use cases that you think would fit as part of this - API, feel free to reach out via `gitter `_ + API, feel free to reach out via `Gitter `_ or `email `_. Valid Transactions @@ -79,7 +84,9 @@ Valid Transactions Streams an event for any newly validated transactions. Message bodies contain the transaction's ID, associated asset ID, and containing block's ID. -Example message:: +Example message: + +.. code:: JSON { "tx_id": "", From 31028ba1472e8e31b91f0cfa746d2a854752a7a2 Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Fri, 5 May 2017 11:16:00 +0200 Subject: [PATCH 18/60] New docs how to 'Create an Email Alert' in OMS Portal --- .../log-analytics.rst | 69 ++++++++++++++----- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/docs/server/source/cloud-deployment-templates/log-analytics.rst b/docs/server/source/cloud-deployment-templates/log-analytics.rst index fbef70d2..1f5d5596 100644 --- a/docs/server/source/cloud-deployment-templates/log-analytics.rst +++ b/docs/server/source/cloud-deployment-templates/log-analytics.rst @@ -3,11 +3,10 @@ Log Analytics on Azure This section documents how to create and configure a Log Analytics workspace on Azure, for a Kubernetes-based deployment. - The documented approach is based on an integration of Microsoft's Operations Management Suite (OMS) with a Kubernetes-based Azure Container Service cluster. -The :ref:`oms-k8s-references` contains links to more detailed documentation on +The :ref:`oms-k8s-references` section (below) contains links to more detailed documentation on Azure, and Kubernetes. There are three main steps involved: @@ -23,9 +22,9 @@ one template so we'll cover them together. Step 3 relies on a Minimum Requirements -------------------- This document assumes that you have already deployed a Kubernetes cluster, and -that you have the Kubernetes command line ``kubectl`` installed. +that you have the Kubernetes command line interface ``kubectl`` installed. -Creating a workspace and adding a containers solution +Creating a Workspace and Adding a Containers Solution ----------------------------------------------------- For the sake of this document and example, we'll assume an existing resource group named: @@ -46,7 +45,7 @@ If you feel creative you may replace these names by more interesting ones. --template-file log_analytics_oms.json \ --parameters @log_analytics_oms.parameters.json -An example of a simple tenplate file (``--template-file``): +An example of a simple template file (``--template-file``): .. code-block:: json @@ -120,14 +119,14 @@ An example of the associated parameter file (``--parameters``): } } -Deploying the OMS agent(s) --------------------------- -In order to deploy an OMS agent two important pieces of information are needed: +Deploy the OMS Agents +--------------------- +To deploy an OMS agent, two important pieces of information are needed: * workspace id * workspace key -Obtaining the workspace id: +You can obtain the workspace id using: .. code-block:: bash @@ -138,13 +137,17 @@ Obtaining the workspace id: | grep customerId "customerId": "12345678-1234-1234-1234-123456789012", -Obtaining the workspace key: +Until we figure out a way to obtain the *workspace key* via the command line, +you can get it via the OMS Portal. +To get to the OMS Portal, go to the Azure Portal and click on: -Until we figure out a way to this via the command line please see instructions -under `Obtain your workspace ID and key -`_. +Resource Groups > (Your k8s cluster's resource group) > Log analytics (OMS) > (Name of the only item listed) > OMS Workspace > OMS Portal -Once you have the workspace id and key you can include them in the following +(Let us know if you find a faster way.) +Then see `Microsoft's instructions to obtain your workspace ID and key +`_ (via the OMS Portal). + +Once you have the workspace id and key, you can include them in the following YAML file (:download:`oms-daemonset.yaml <../../../../k8s/logging-and-monitoring/oms-daemonset.yaml>`): @@ -182,14 +185,44 @@ YAML file (:download:`oms-daemonset.yaml hostPath: path: /var/run/docker.sock -To deploy the agent simply run the following command: +To deploy the OMS agents (one per Kubernetes node, i.e. one per computer), +simply run the following command: .. code-block:: bash $ kubectl create -f oms-daemonset.yaml -Some useful management tasks +Create an Email Alert +--------------------- + +Suppose you want to get an email whenever there's a logging message +with the CRITICAL or ERROR logging level from any container. +At the time of writing, it wasn't possible to create email alerts +using the Azure Portal (as far as we could tell), +but it *was* possible using the OMS Portal. +(There are instructions to get to the OMS Portal +in the section titled :ref:`Deploy the OMS Agents` above.) +Once you're in the OMS Portal, click on **Log Search** +and enter the query string: + +``Type=ContainerLog (critical OR error)`` + +If you don't see any query results, +try experimenting with the query string and time range +to convince yourself that it's working. +For query syntax help, see the +`Log Analytics search reference `_. +If you want to exclude the "404 Not Found" errors, +use the query string +"Type=ContainerLog (critical OR error) NOT(404)". +Once you're satisfied with the query string, +click the **🔔 Alert** icon in the top menu, +fill in the form, +and click **Save** when you're done. + + +Some Useful Management Tasks ---------------------------- List workspaces: @@ -207,7 +240,7 @@ List solutions: --resource-group resource_group \ --resource-type Microsoft.OperationsManagement/solutions -Deleting the containers solution: +Delete the containers solution: .. code-block:: bash @@ -222,7 +255,7 @@ Deleting the containers solution: --resource-type Microsoft.OperationsManagement/solutions \ --name "Containers(work_space)" -Deleting the workspace: +Delete the workspace: .. code-block:: bash From b4f14b26ce264e7c24053b6e9be4cbea21ade054 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 8 May 2017 14:25:39 +0200 Subject: [PATCH 19/60] address nomenclature issues --- bigchaindb/core.py | 2 +- bigchaindb/fastquery.py | 39 ++++++++++++++++++++--------------- tests/db/test_bigchain_api.py | 4 ++-- tests/test_fastquery.py | 20 +++++++++--------- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 63cab1d2..91666224 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -395,7 +395,7 @@ class Bigchain(object): """ Get a list of output links filtered on some criteria """ - outputs = self.fastquery.get_outputs_by_pubkey(owner) + outputs = self.fastquery.get_outputs_by_public_key(owner) if not include_spent: outputs = self.fastquery.filter_spent_outputs(outputs) return outputs diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py index deffabe1..d19294ce 100644 --- a/bigchaindb/fastquery.py +++ b/bigchaindb/fastquery.py @@ -22,32 +22,37 @@ class FastQuery: self.connection = connection self.me = me - def filter_block_ids(self, block_ids, include_undecided=True): + def filter_valid_block_ids(self, block_ids, include_undecided=False): """ - Given block ids, filter the invalid blocks. + Given block ids, return only the ones that are valid. """ block_ids = list(set(block_ids)) votes = query.get_votes_for_blocks_by_voter( self.connection, block_ids, self.me) - votes = {v['vote']['voting_for_block']: v['vote']['is_block_valid'] for v in votes} - return [b for b in block_ids if votes.get(b, include_undecided)] + votes = {vote['vote']['voting_for_block']: vote['vote']['is_block_valid'] + for vote in votes} + return [block_id for block_id in block_ids + if votes.get(block_id, include_undecided)] - def filter_valid_blocks(self, blocks, key=lambda b: b[0]): + def filter_valid_items(self, items, block_id_key=lambda b: b[0]): """ - Given things with block ids, remove the invalid ones. + Given items with block ids, return only the ones that are valid or undecided. """ - blocks = list(blocks) - valid_block_ids = set(self.filter_block_ids(key(b) for b in blocks)) - return [b for b in blocks if key(b) in valid_block_ids] + items = list(items) + block_ids = map(block_id_key, items) + valid_block_ids = set(self.filter_valid_block_ids(block_ids, True)) + return [b for b in items if block_id_key(b) in valid_block_ids] - def get_outputs_by_pubkey(self, pubkey): - """ Get outputs for a public key """ - res = list(query.get_owned_ids(self.connection, pubkey)) - txs = [tx for _, tx in self.filter_valid_blocks(res)] - return [TransactionLink(tx['id'], i) + def get_outputs_by_public_key(self, public_key): + """ + Get outputs for a public key + """ + res = list(query.get_owned_ids(self.connection, public_key)) + txs = [tx for _, tx in self.filter_valid_items(res)] + return [TransactionLink(tx['id'], index) for tx in txs - for i, o in enumerate(tx['outputs']) - if output_has_owner(o, pubkey)] + for index, output in enumerate(tx['outputs']) + if output_has_owner(output, public_key)] def filter_spent_outputs(self, outputs): """ @@ -58,7 +63,7 @@ class FastQuery: """ links = [o.to_dict() for o in outputs] res = query.get_spending_transactions(self.connection, links) - txs = [tx for _, tx in self.filter_valid_blocks(res)] + txs = [tx for _, tx in self.filter_valid_items(res)] spends = {TransactionLink.from_dict(input_['fulfills']) for tx in txs for input_ in tx['inputs']} diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 589671e5..8d17a9e7 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -1119,7 +1119,7 @@ def test_get_owned_ids_calls_get_outputs_filtered(): def test_get_outputs_filtered_only_unspent(): from bigchaindb.common.transaction import TransactionLink from bigchaindb.core import Bigchain - with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_pubkey') as get_outputs: + with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_public_key') as get_outputs: get_outputs.return_value = [TransactionLink('a', 1), TransactionLink('b', 2)] with patch('bigchaindb.fastquery.FastQuery.filter_spent_outputs') as filter_spent: @@ -1132,7 +1132,7 @@ def test_get_outputs_filtered_only_unspent(): def test_get_outputs_filtered(): from bigchaindb.common.transaction import TransactionLink from bigchaindb.core import Bigchain - with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_pubkey') as get_outputs: + with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_public_key') as get_outputs: get_outputs.return_value = [TransactionLink('a', 1), TransactionLink('b', 2)] with patch('bigchaindb.fastquery.FastQuery.filter_spent_outputs') as filter_spent: diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py index c54ef10e..8fb3378c 100644 --- a/tests/test_fastquery.py +++ b/tests/test_fastquery.py @@ -21,30 +21,30 @@ def blockdata(b, user_pk, user2_pk): return blocks, [b['id'] for b in blocks] -def test_filter_block_ids_with_undecided(b, blockdata): +def test_filter_valid_block_ids_with_undecided(b, blockdata): blocks, block_ids = blockdata - valid_ids = b.fastquery.filter_block_ids(block_ids) + valid_ids = b.fastquery.filter_valid_block_ids(block_ids, include_undecided=True) assert set(valid_ids) == {blocks[0]['id'], blocks[1]['id']} -def test_filter_block_ids_only_valid(b, blockdata): +def test_filter_valid_block_ids_only_valid(b, blockdata): blocks, block_ids = blockdata - valid_ids = b.fastquery.filter_block_ids(block_ids, include_undecided=False) + valid_ids = b.fastquery.filter_valid_block_ids(block_ids) assert set(valid_ids) == {blocks[1]['id']} -def test_filter_valid_blocks(b, blockdata): +def test_filter_valid_items(b, blockdata): blocks, _ = blockdata - assert (b.fastquery.filter_valid_blocks(blocks, key=lambda b: b['id']) + assert (b.fastquery.filter_valid_items(blocks, block_id_key=lambda b: b['id']) == [blocks[0], blocks[1]]) -def test_get_outputs_by_pubkey(b, user_pk, user2_pk, blockdata): +def test_get_outputs_by_public_key(b, user_pk, user2_pk, blockdata): blocks, _ = blockdata - assert b.fastquery.get_outputs_by_pubkey(user_pk) == [ + assert b.fastquery.get_outputs_by_public_key(user_pk) == [ TransactionLink(blocks[1]['block']['transactions'][0]['id'], 0) ] - assert b.fastquery.get_outputs_by_pubkey(user2_pk) == [ + assert b.fastquery.get_outputs_by_public_key(user2_pk) == [ TransactionLink(blocks[0]['block']['transactions'][0]['id'], 0) ] @@ -76,7 +76,7 @@ def test_filter_spent_outputs(b, user_pk): block = Block([tx4]) b.write_block(block) - outputs = b.fastquery.get_outputs_by_pubkey(user_pk) + outputs = b.fastquery.get_outputs_by_public_key(user_pk) unspents = b.fastquery.filter_spent_outputs(outputs) assert set(unspents) == { From 92392b51a7a6e1eda3263d17d5ca3ffc2bf46f2c Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 10 May 2017 16:43:52 +0200 Subject: [PATCH 20/60] Initial implementation to decouple assets from transactions. Most changes done to how we write and read blocks to the database. Created schema, indexes and queries for mongodb. Fixed tests. --- bigchaindb/backend/mongodb/query.py | 38 +++++++++++++++--- bigchaindb/backend/mongodb/schema.py | 13 ++++++- bigchaindb/backend/query.py | 14 ++++++- bigchaindb/backend/rethinkdb/query.py | 2 +- bigchaindb/core.py | 48 ++++++++++++++++++----- bigchaindb/models.py | 56 +++++++++++++++++++++++++++ bigchaindb/pipelines/vote.py | 2 +- tests/backend/mongodb/test_queries.py | 13 ++++--- tests/backend/mongodb/test_schema.py | 9 ++++- tests/backend/test_generics.py | 4 +- tests/db/test_bigchain_api.py | 2 +- tests/pipelines/test_vote.py | 18 +++++++-- 12 files changed, 187 insertions(+), 32 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 74b9c35a..39d99d4a 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -7,7 +7,7 @@ from pymongo import ReturnDocument from bigchaindb import backend from bigchaindb.common.exceptions import CyclicBlockchainError from bigchaindb.common.transaction import Transaction -from bigchaindb.backend.exceptions import DuplicateKeyError +from bigchaindb.backend.exceptions import DuplicateKeyError, OperationError from bigchaindb.backend.utils import module_dispatch_registrar from bigchaindb.backend.mongodb.connection import MongoDBConnection @@ -127,6 +127,7 @@ def get_txids_filtered(conn, asset_id, operation=None): return (elem['block']['transactions']['id'] for elem in cursor) +# TODO: This doesn't seem to be used anywhere @register_query(MongoDBConnection) def get_asset_by_id(conn, asset_id): cursor = conn.run( @@ -206,10 +207,10 @@ def get_votes_by_block_id_and_voter(conn, block_id, node_pubkey): @register_query(MongoDBConnection) -def write_block(conn, block): +def write_block(conn, block_dict): return conn.run( conn.collection('bigchain') - .insert_one(block.to_dict())) + .insert_one(block_dict)) @register_query(MongoDBConnection) @@ -220,6 +221,31 @@ def get_block(conn, block_id): projection={'_id': False})) +@register_query(MongoDBConnection) +def write_assets(conn, assets): + try: + # unordered means that all the inserts will be attempted instead of + # stopping after the first error. + return conn.run( + conn.collection('assets') + .insert_many(assets, ordered=False)) + # This can happen if we try to write the same asset multiple times. + # One case is when we write the same transaction into multiple blocks due + # to invalid blocks. + # The actual mongodb exception is a BulkWriteError due to a duplicated key + # in one of the inserts. + except OperationError: + return + + +@register_query(MongoDBConnection) +def get_assets(conn, asset_ids): + return conn.run( + conn.collection('assets') + .find({'id': {'$in': asset_ids}}, + projection={'_id': False})) + + @register_query(MongoDBConnection) def count_blocks(conn): return conn.run( @@ -252,7 +278,7 @@ def get_genesis_block(conn): @register_query(MongoDBConnection) -def get_last_voted_block(conn, node_pubkey): +def get_last_voted_block_id(conn, node_pubkey): last_voted = conn.run( conn.collection('votes') .find({'node_pubkey': node_pubkey}, @@ -261,7 +287,7 @@ def get_last_voted_block(conn, node_pubkey): # pymongo seems to return a cursor even if there are no results # so we actually need to check the count if last_voted.count() == 0: - return get_genesis_block(conn) + return get_genesis_block(conn)['id'] mapping = {v['vote']['previous_block']: v['vote']['voting_for_block'] for v in last_voted} @@ -279,7 +305,7 @@ def get_last_voted_block(conn, node_pubkey): except KeyError: break - return get_block(conn, last_block_id) + return last_block_id @register_query(MongoDBConnection) diff --git a/bigchaindb/backend/mongodb/schema.py b/bigchaindb/backend/mongodb/schema.py index ad89f9bc..527476f0 100644 --- a/bigchaindb/backend/mongodb/schema.py +++ b/bigchaindb/backend/mongodb/schema.py @@ -27,7 +27,7 @@ def create_database(conn, dbname): @register_schema(MongoDBConnection) def create_tables(conn, dbname): - for table_name in ['bigchain', 'backlog', 'votes']: + for table_name in ['bigchain', 'backlog', 'votes', 'assets']: logger.info('Create `%s` table.', table_name) # create the table # TODO: read and write concerns can be declared here @@ -39,6 +39,7 @@ def create_indexes(conn, dbname): create_bigchain_secondary_index(conn, dbname) create_backlog_secondary_index(conn, dbname) create_votes_secondary_index(conn, dbname) + create_assets_secondary_index(conn, dbname) @register_schema(MongoDBConnection) @@ -102,3 +103,13 @@ def create_votes_secondary_index(conn, dbname): ASCENDING)], name='block_and_voter', unique=True) + + +def create_assets_secondary_index(conn, dbname): + logger.info('Create `assets` secondary index.') + + # is the first index redundant then? + # compound index to order votes by block id and node + conn.conn[dbname]['assets'].create_index('id', + name='asset_id', + unique=True) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 9aa653d7..8f1325ef 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -211,6 +211,18 @@ def get_block(connection, block_id): raise NotImplementedError +@singledispatch +def write_assets(connection, assets): + # TODO: write docstring + raise NotImplementedError + + +@singledispatch +def get_assets(connection, assets): + # TODO: write docstring + raise NotImplementedError + + @singledispatch def count_blocks(connection): """Count the number of blocks in the bigchain table. @@ -259,7 +271,7 @@ def get_genesis_block(connection): @singledispatch -def get_last_voted_block(connection, node_pubkey): +def get_last_voted_block_id(connection, node_pubkey): """Get the last voted block for a specific node. Args: diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index 6011cc8c..be20442a 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -188,7 +188,7 @@ def get_genesis_block(connection): @register_query(RethinkDBConnection) -def get_last_voted_block(connection, node_pubkey): +def get_last_voted_block_id(connection, node_pubkey): try: # get the latest value for the vote timestamp (over all votes) max_timestamp = connection.run( diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 5d2e9c03..c6ff5608 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -183,15 +183,23 @@ class Bigchain(object): include_status (bool): also return the status of the block the return value is then a tuple: (block, status) """ - block = backend.query.get_block(self.connection, block_id) - status = None + # get block from database + block_dict = backend.query.get_block(self.connection, block_id) + # get the asset ids from the block + if block_dict: + asset_ids = Block.get_asset_ids(block_dict) + # get the assets from the database + assets = self.get_assets(asset_ids) + # add the assets to the block transactions + block_dict = Block.couple_assets(block_dict, assets) + status = None if include_status: - if block: - status = self.block_election_status(block) - return block, status + if block_dict: + status = self.block_election_status(block_dict) + return block_dict, status else: - return block + return block_dict def get_transaction(self, txid, include_status=False): """Get the transaction with the specified `txid` (and optionally its status) @@ -251,7 +259,13 @@ class Bigchain(object): tx_status = self.TX_IN_BACKLOG if response: - response = Transaction.from_dict(response) + if tx_status == self.TX_IN_BACKLOG: + response = Transaction.from_dict(response) + else: + # If we are reading from the bigchain collection the asset is + # not in the transaction so we need to fetch the asset and + # reconstruct the transaction. + response = Transaction.from_db(self, response) if include_status: return response, tx_status @@ -513,7 +527,14 @@ class Bigchain(object): block (Block): block to write to bigchain. """ - return backend.query.write_block(self.connection, block) + # Decouple assets from block + assets, block_dict = block.decouple_assets() + # write the assets + if assets: + self.write_assets(assets) + + # write the block + return backend.query.write_block(self.connection, block_dict) def prepare_genesis_block(self): """Prepare a genesis block.""" @@ -592,7 +613,9 @@ class Bigchain(object): def get_last_voted_block(self): """Returns the last block that this node voted on.""" - return Block.from_dict(backend.query.get_last_voted_block(self.connection, self.me)) + last_block_id = backend.query.get_last_voted_block_id(self.connection, + self.me) + return Block.from_dict(self.get_block(last_block_id)) def get_unvoted_blocks(self): """Return all the blocks that have not been voted on by this node. @@ -616,3 +639,10 @@ class Bigchain(object): """Tally the votes on a block, and return the status: valid, invalid, or undecided.""" return self.block_election(block)['status'] + + def get_assets(self, asset_ids): + # TODO: write docstrings + return backend.query.get_assets(self.connection, asset_ids) + + def write_assets(self, assets): + return backend.query.write_assets(self.connection, assets) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 2f46ba20..5de56aaf 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -1,3 +1,5 @@ +from copy import deepcopy + from bigchaindb.common.crypto import hash_data, PublicKey, PrivateKey from bigchaindb.common.exceptions import (InvalidHash, InvalidSignature, DoubleSpend, InputDoesNotExist, @@ -84,6 +86,16 @@ class Transaction(Transaction): validate_transaction_schema(tx_body) return super().from_dict(tx_body) + @classmethod + def from_db(cls, bigchain, tx_dict): + # TODO: write docstring + if tx_dict['operation'] in [Transaction.CREATE, Transaction.CREATE]: + asset = bigchain.get_assets([tx_dict['id']])[0] + asset.pop('id') + tx_dict.update({'asset': asset}) + + return cls.from_dict(tx_dict) + class Block(object): """Bundle a list of Transactions in a Block. Nodes vote on its validity. @@ -300,5 +312,49 @@ class Block(object): 'signature': self.signature, } + @classmethod + def from_db(cls, bigchain, block_dict): + asset_ids = cls.get_asset_ids(block_dict) + assets = bigchain.get_assets(asset_ids) + block_dict = cls.couple_assets(block_dict, assets) + return cls.from_dict(block_dict) + + def decouple_assets(self): + # TODO: Write documentation + block_dict = deepcopy(self.to_dict()) + assets = [] + for transaction in block_dict['block']['transactions']: + if transaction['operation'] in [Transaction.CREATE, + Transaction.GENESIS]: + asset = transaction.pop('asset') + asset.update({'id': transaction['id']}) + assets.append(asset) + + return (assets, block_dict) + + @staticmethod + def couple_assets(block_dict, assets): + # TODO: Write docstring + # create a dict with {'': asset} + assets = {asset.pop('id'): asset for asset in assets} + # add the assets to the block transactions + for transaction in block_dict['block']['transactions']: + if transaction['operation'] in [Transaction.CREATE, + Transaction.GENESIS]: + transaction.update({'asset': assets.get(transaction['id'], + None)}) + return block_dict + + @staticmethod + def get_asset_ids(block_dict): + # TODO: Write docstring + asset_ids = [] + for transaction in block_dict['block']['transactions']: + if transaction['operation'] in [Transaction.CREATE, + Transaction.GENESIS]: + asset_ids.append(transaction['id']) + + return asset_ids + def to_str(self): return serialize(self.to_dict()) diff --git a/bigchaindb/pipelines/vote.py b/bigchaindb/pipelines/vote.py index 9664c520..e90ce6c4 100644 --- a/bigchaindb/pipelines/vote.py +++ b/bigchaindb/pipelines/vote.py @@ -50,7 +50,7 @@ class Vote: def validate_block(self, block): if not self.bigchain.has_previous_vote(block['id']): try: - block = Block.from_dict(block) + block = Block.from_db(self.bigchain, block) except (exceptions.InvalidHash): # XXX: if a block is invalid we should skip the `validate_tx` # step, but since we are in a pipeline we cannot just jump to diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index bd7e75f1..1363d9d7 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -269,7 +269,7 @@ def test_write_block(signed_create_tx): # create and write block block = Block(transactions=[signed_create_tx]) - query.write_block(conn, block) + query.write_block(conn, block.to_dict()) block_db = conn.db.bigchain.find_one({'id': block.id}, {'_id': False}) @@ -347,17 +347,18 @@ def test_get_genesis_block(genesis_block): from bigchaindb.backend import connect, query conn = connect() - assert query.get_genesis_block(conn) == genesis_block.to_dict() + assets, genesis_block_dict = genesis_block.decouple_assets() + assert query.get_genesis_block(conn) == genesis_block_dict -def test_get_last_voted_block(genesis_block, signed_create_tx, b): +def test_get_last_voted_block_id(genesis_block, signed_create_tx, b): from bigchaindb.backend import connect, query from bigchaindb.models import Block from bigchaindb.common.exceptions import CyclicBlockchainError conn = connect() # check that the last voted block is the genesis block - assert query.get_last_voted_block(conn, b.me) == genesis_block.to_dict() + assert query.get_last_voted_block_id(conn, b.me) == genesis_block.id # create and insert a new vote and block block = Block(transactions=[signed_create_tx]) @@ -365,7 +366,7 @@ def test_get_last_voted_block(genesis_block, signed_create_tx, b): vote = b.vote(block.id, genesis_block.id, True) conn.db.votes.insert_one(vote) - assert query.get_last_voted_block(conn, b.me) == block.to_dict() + assert query.get_last_voted_block_id(conn, b.me) == block.id # force a bad chain vote.pop('_id') @@ -374,7 +375,7 @@ def test_get_last_voted_block(genesis_block, signed_create_tx, b): conn.db.votes.insert_one(vote) with pytest.raises(CyclicBlockchainError): - query.get_last_voted_block(conn, b.me) + query.get_last_voted_block_id(conn, b.me) def test_get_unvoted_blocks(signed_create_tx): diff --git a/tests/backend/mongodb/test_schema.py b/tests/backend/mongodb/test_schema.py index 71eac7ff..e3b320bd 100644 --- a/tests/backend/mongodb/test_schema.py +++ b/tests/backend/mongodb/test_schema.py @@ -18,7 +18,8 @@ def test_init_creates_db_tables_and_indexes(): init_database() collection_names = conn.conn[dbname].collection_names() - assert sorted(collection_names) == ['backlog', 'bigchain', 'votes'] + assert sorted(collection_names) == ['assets', 'backlog', 'bigchain', + 'votes'] indexes = conn.conn[dbname]['bigchain'].index_information().keys() assert sorted(indexes) == ['_id_', 'asset_id', 'block_timestamp', 'inputs', @@ -31,6 +32,9 @@ def test_init_creates_db_tables_and_indexes(): indexes = conn.conn[dbname]['votes'].index_information().keys() assert sorted(indexes) == ['_id_', 'block_and_voter'] + indexes = conn.conn[dbname]['assets'].index_information().keys() + assert sorted(indexes) == ['_id_', 'asset_id'] + def test_init_database_fails_if_db_exists(): import bigchaindb @@ -62,7 +66,8 @@ def test_create_tables(): schema.create_tables(conn, dbname) collection_names = conn.conn[dbname].collection_names() - assert sorted(collection_names) == ['backlog', 'bigchain', 'votes'] + assert sorted(collection_names) == ['assets', 'backlog', 'bigchain', + 'votes'] def test_create_secondary_indexes(): diff --git a/tests/backend/test_generics.py b/tests/backend/test_generics.py index 57a644ee..6a1e9447 100644 --- a/tests/backend/test_generics.py +++ b/tests/backend/test_generics.py @@ -30,12 +30,14 @@ def test_schema(schema_func_name, args_qty): ('write_block', 1), ('get_block', 1), ('write_vote', 1), - ('get_last_voted_block', 1), + ('get_last_voted_block_id', 1), ('get_unvoted_blocks', 1), ('get_spent', 2), ('get_votes_by_block_id_and_voter', 2), ('update_transaction', 2), ('get_transaction_from_block', 2), + ('write_assets', 1), + ('get_assets', 1), )) def test_query(query_func_name, args_qty): from bigchaindb.backend import query diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 3f05385c..5960f171 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -383,7 +383,7 @@ class TestBigchainApi(object): from bigchaindb.backend import query genesis = query.get_genesis_block(b.connection) - genesis = Block.from_dict(genesis) + genesis = Block.from_db(b, genesis) gb = b.get_last_voted_block() assert gb == genesis assert b.validate_block(gb) == gb diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index 7df7ca11..29523035 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -20,6 +20,15 @@ def dummy_block(b): return block +def decouple_assets(b, block): + # the block comming from the database does not contain the assets + # so we need to pass the block without the assets and store the assets + # so that the voting pipeline can reconstruct it + assets, block_dict = block.decouple_assets() + b.write_assets(assets) + return block_dict + + DUMMY_SHA3 = '0123456789abcdef' * 4 @@ -79,9 +88,10 @@ def test_vote_validate_block(b): tx = dummy_tx(b) block = b.create_block([tx]) + block_dict = decouple_assets(b, block) vote_obj = vote.Vote() - validation = vote_obj.validate_block(block.to_dict()) + validation = vote_obj.validate_block(block_dict) assert validation[0] == block.id for tx1, tx2 in zip(validation[1], block.transactions): assert tx1 == tx2 @@ -220,8 +230,9 @@ def test_valid_block_voting_multiprocessing(b, genesis_block, monkeypatch): vote_pipeline.setup(indata=inpipe, outdata=outpipe) block = dummy_block(b) + block_dict = decouple_assets(b, block) - inpipe.put(block.to_dict()) + inpipe.put(block_dict) vote_pipeline.start() vote_out = outpipe.get() vote_pipeline.terminate() @@ -257,6 +268,7 @@ def test_valid_block_voting_with_create_transaction(b, monkeypatch.setattr('time.time', lambda: 1111111111) block = b.create_block([tx]) + block_dict = decouple_assets(b, block) inpipe = Pipe() outpipe = Pipe() @@ -264,7 +276,7 @@ def test_valid_block_voting_with_create_transaction(b, vote_pipeline = vote.create_pipeline() vote_pipeline.setup(indata=inpipe, outdata=outpipe) - inpipe.put(block.to_dict()) + inpipe.put(block_dict) vote_pipeline.start() vote_out = outpipe.get() vote_pipeline.terminate() From 8c0dbeb28199ffab64619b281f04fb94e39fd05e Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 10 May 2017 17:55:43 +0200 Subject: [PATCH 21/60] Added asset decoupling support for rethinkdb Updated schema. Created queries for assets table. Fixed tests. --- bigchaindb/backend/rethinkdb/query.py | 25 +++++++++++++++++++------ bigchaindb/backend/rethinkdb/schema.py | 2 +- bigchaindb/models.py | 3 ++- tests/backend/rethinkdb/test_schema.py | 3 ++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index be20442a..417bcd93 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -6,6 +6,7 @@ import rethinkdb as r from bigchaindb import backend, utils from bigchaindb.common import exceptions from bigchaindb.common.transaction import Transaction +from bigchaindb.common.utils import serialize from bigchaindb.backend.utils import module_dispatch_registrar from bigchaindb.backend.rethinkdb.connection import RethinkDBConnection @@ -147,10 +148,10 @@ def get_votes_by_block_id_and_voter(connection, block_id, node_pubkey): @register_query(RethinkDBConnection) -def write_block(connection, block): +def write_block(connection, block_dict): return connection.run( r.table('bigchain') - .insert(r.json(block.to_str()), durability=WRITE_DURABILITY)) + .insert(r.json(serialize(block_dict)), durability=WRITE_DURABILITY)) @register_query(RethinkDBConnection) @@ -158,6 +159,20 @@ def get_block(connection, block_id): return connection.run(r.table('bigchain').get(block_id)) +@register_query(RethinkDBConnection) +def write_assets(connection, assets): + return connection.run( + r.table('assets') + .insert(assets, durability=WRITE_DURABILITY)) + + +@register_query(RethinkDBConnection) +def get_assets(connection, asset_ids): + return connection.run( + r.table('assets', read_mode=READ_MODE) + .get_all(*asset_ids)) + + @register_query(RethinkDBConnection) def count_blocks(connection): return connection.run( @@ -203,7 +218,7 @@ def get_last_voted_block_id(connection, node_pubkey): except r.ReqlNonExistenceError: # return last vote if last vote exists else return Genesis block - return get_genesis_block(connection) + return get_genesis_block(connection)['id'] # Now the fun starts. Since the resolution of timestamp is a second, # we might have more than one vote per timestamp. If this is the case @@ -235,9 +250,7 @@ def get_last_voted_block_id(connection, node_pubkey): except KeyError: break - return connection.run( - r.table('bigchain', read_mode=READ_MODE) - .get(last_block_id)) + return last_block_id @register_query(RethinkDBConnection) diff --git a/bigchaindb/backend/rethinkdb/schema.py b/bigchaindb/backend/rethinkdb/schema.py index 997ec5fc..8f0f6b9c 100644 --- a/bigchaindb/backend/rethinkdb/schema.py +++ b/bigchaindb/backend/rethinkdb/schema.py @@ -23,7 +23,7 @@ def create_database(connection, dbname): @register_schema(RethinkDBConnection) def create_tables(connection, dbname): - for table_name in ['bigchain', 'backlog', 'votes']: + for table_name in ['bigchain', 'backlog', 'votes', 'assets']: logger.info('Create `%s` table.', table_name) connection.run(r.db(dbname).table_create(table_name)) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 5de56aaf..81ca898c 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -90,7 +90,8 @@ class Transaction(Transaction): def from_db(cls, bigchain, tx_dict): # TODO: write docstring if tx_dict['operation'] in [Transaction.CREATE, Transaction.CREATE]: - asset = bigchain.get_assets([tx_dict['id']])[0] + # TODO: Maybe replace this call to a call to get_asset_by_id + asset = list(bigchain.get_assets([tx_dict['id']]))[0] asset.pop('id') tx_dict.update({'asset': asset}) diff --git a/tests/backend/rethinkdb/test_schema.py b/tests/backend/rethinkdb/test_schema.py index e19dfdc2..6f77b672 100644 --- a/tests/backend/rethinkdb/test_schema.py +++ b/tests/backend/rethinkdb/test_schema.py @@ -63,7 +63,8 @@ def test_create_tables(): assert conn.run(r.db(dbname).table_list().contains('bigchain')) is True assert conn.run(r.db(dbname).table_list().contains('backlog')) is True assert conn.run(r.db(dbname).table_list().contains('votes')) is True - assert len(conn.run(r.db(dbname).table_list())) == 3 + assert conn.run(r.db(dbname).table_list().contains('assets')) is True + assert len(conn.run(r.db(dbname).table_list())) == 4 @pytest.mark.bdb From aacba571f85f317882547c97b8c132281cfae2c0 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Thu, 11 May 2017 11:38:07 +0200 Subject: [PATCH 22/60] Added docstrings and tests to new methods --- bigchaindb/backend/mongodb/schema.py | 4 +- bigchaindb/backend/query.py | 21 +++- bigchaindb/models.py | 51 +++++++++- tests/backend/mongodb/test_queries.py | 46 +++++++++ tests/test_block_model.py | 133 ++++++++++++++++++++++++++ 5 files changed, 247 insertions(+), 8 deletions(-) diff --git a/bigchaindb/backend/mongodb/schema.py b/bigchaindb/backend/mongodb/schema.py index 527476f0..12b873e0 100644 --- a/bigchaindb/backend/mongodb/schema.py +++ b/bigchaindb/backend/mongodb/schema.py @@ -108,8 +108,8 @@ def create_votes_secondary_index(conn, dbname): def create_assets_secondary_index(conn, dbname): logger.info('Create `assets` secondary index.') - # is the first index redundant then? - # compound index to order votes by block id and node + # unique index on the id of the asset. + # the id is the txid of the transaction that created the asset conn.conn[dbname]['assets'].create_index('id', name='asset_id', unique=True) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 8f1325ef..5c37647c 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -213,13 +213,28 @@ def get_block(connection, block_id): @singledispatch def write_assets(connection, assets): - # TODO: write docstring + """Write a list of assets to the assets table. + + Args: + assets (list): a list of assets to write. + + Returns: + The database response. + """ raise NotImplementedError @singledispatch -def get_assets(connection, assets): - # TODO: write docstring +def get_assets(connection, asset_ids): + """Get a list of assets from the assets table. + + Args: + asset_ids (list): a of list of ids for the assets to be retrieved from + the database. + + Returns: + assets (list): the list of returned assets. + """ raise NotImplementedError diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 81ca898c..53266930 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -315,13 +315,33 @@ class Block(object): @classmethod def from_db(cls, bigchain, block_dict): + """ + Helper method that reconstructs a block_dict that was returned from + the database. If checks what asset_ids to retrieve, retrieves the + assets from the assets table and reconstructs the block. + + Args: + bigchain (:class:`~bigchaindb.Bigchain`): An instance of Bigchain + used to perform database queries. + + Returns: + :class:`~Block` + + """ asset_ids = cls.get_asset_ids(block_dict) assets = bigchain.get_assets(asset_ids) block_dict = cls.couple_assets(block_dict, assets) return cls.from_dict(block_dict) def decouple_assets(self): - # TODO: Write documentation + """ + Extracts the assets from the `CREATE` transactions in the block. + + Returns: + tuple: (assets, block) with the assets being a list of dicts and + the block being the dict of the block with no assets in the CREATE + transactions. + """ block_dict = deepcopy(self.to_dict()) assets = [] for transaction in block_dict['block']['transactions']: @@ -335,7 +355,20 @@ class Block(object): @staticmethod def couple_assets(block_dict, assets): - # TODO: Write docstring + """ + Give a block_dict with not assets (as returned from a database call) + and a list of assets, reconstruct the original block by puting the + assets back into the `CREATE` transactions in the block. + + Args: + block_dict (:obj:`dict`): The block dict as returned from a + database call. + assets (:obj:`list` of :obj:`dict`): A list of assets returned from + a database call. + + Returns: + dict: The dict of the reconstructed block. + """ # create a dict with {'': asset} assets = {asset.pop('id'): asset for asset in assets} # add the assets to the block transactions @@ -348,7 +381,19 @@ class Block(object): @staticmethod def get_asset_ids(block_dict): - # TODO: Write docstring + """ + Given a block_dict return all the asset_ids for that block (the txid + of CREATE transactions). Usefull to know which assets to retrieve + from the database to reconstruct the block. + + Args: + block_dict (:obj:`dict`): The block dict as returned from a + database call. + + Returns: + list: The list of asset_ids in the block. + + """ asset_ids = [] for transaction in block_dict['block']['transactions']: if transaction['operation'] in [Transaction.CREATE, diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index 1363d9d7..c43c5fa4 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -1,4 +1,7 @@ +from copy import deepcopy + import pytest +import pymongo pytestmark = pytest.mark.bdb @@ -418,3 +421,46 @@ def test_get_txids_filtered(signed_create_tx, signed_transfer_tx): # Test get by asset and TRANSFER txids = set(query.get_txids_filtered(conn, asset_id, Transaction.TRANSFER)) assert txids == {signed_transfer_tx.id} + + +def test_write_assets(): + from bigchaindb.backend import connect, query + conn = connect() + + assets = [ + {'id': 1, 'data': '1'}, + {'id': 2, 'data': '2'}, + {'id': 3, 'data': '3'}, + # Duplicated id. Should not be written to the database + {'id': 1, 'data': '1'}, + ] + + # write the assets + query.write_assets(conn, deepcopy(assets)) + + # check that 3 assets were written to the database + cursor = conn.db.assets.find({}, projection={'_id': False})\ + .sort('id', pymongo.ASCENDING) + + assert cursor.count() == 3 + assert list(cursor) == assets[:-1] + + +def test_get_assets(): + from bigchaindb.backend import connect, query + conn = connect() + + assets = [ + {'id': 1, 'data': '1'}, + {'id': 2, 'data': '2'}, + {'id': 3, 'data': '3'}, + ] + + # write the assets + conn.db.assets.insert_many(deepcopy(assets), ordered=False) + + # read only 2 assets + cursor = query.get_assets(conn, [1, 3]) + + assert cursor.count() == 2 + assert list(cursor.sort('id', pymongo.ASCENDING)) == assets[::2] diff --git a/tests/test_block_model.py b/tests/test_block_model.py index 6e559cb2..981a64d7 100644 --- a/tests/test_block_model.py +++ b/tests/test_block_model.py @@ -153,3 +153,136 @@ class TestBlockModel(object): block = b.create_block([tx, tx]) with raises(DuplicateTransaction): block._validate_block(b) + + def test_decouple_assets(self, b): + from bigchaindb.models import Block, Transaction + + assets = [ + {'msg': '1'}, + {'msg': '2'}, + {'msg': '3'}, + ] + + txs = [] + # create 3 assets + for asset in assets: + tx = Transaction.create([b.me], [([b.me], 1)], asset=asset) + txs.append(tx) + + # create a `TRANSFER` transaction. + # the asset in `TRANSFER` transactions is not extracted + tx = Transaction.transfer(txs[0].to_inputs(), [([b.me], 1)], + asset_id=txs[0].id) + txs.append(tx) + + # create the block + block = Block(txs) + # decouple assets + assets_from_block, block_dict = block.decouple_assets() + + assert len(assets_from_block) == 3 + for i in range(3): + assert assets_from_block[i]['data'] == assets[i] + assert assets_from_block[i]['id'] == txs[i].id + + # check the `TRANSFER` transaction was not changed + assert block.transactions[3].to_dict() == \ + block_dict['block']['transactions'][3] + + def test_couple_assets(self, b): + from bigchaindb.models import Block, Transaction + + assets = [ + {'msg': '1'}, + {'msg': '2'}, + {'msg': '3'}, + ] + + txs = [] + # create 3 assets + for asset in assets: + tx = Transaction.create([b.me], [([b.me], 1)], asset=asset) + txs.append(tx) + + # create a `TRANSFER` transaction. + # the asset in `TRANSFER` transactions is not extracted + tx = Transaction.transfer(txs[0].to_inputs(), [([b.me], 1)], + asset_id=txs[0].id) + txs.append(tx) + + # create the block + block = Block(txs) + # decouple assets + assets_from_block, block_dict = block.decouple_assets() + + # reconstruct the block + block_dict_reconstructed = Block.couple_assets(block_dict, + assets_from_block) + + # check that the reconstructed block is the as the original block + assert block == Block.from_dict(block_dict_reconstructed) + + def test_get_asset_ids(self, b): + from bigchaindb.models import Block, Transaction + + assets = [ + {'msg': '1'}, + {'msg': '2'}, + {'msg': '3'}, + ] + + txs = [] + # create 3 assets + for asset in assets: + tx = Transaction.create([b.me], [([b.me], 1)], asset=asset) + txs.append(tx) + + # create a `TRANSFER` transaction. + # the asset in `TRANSFER` transactions is not extracted + tx = Transaction.transfer(txs[0].to_inputs(), [([b.me], 1)], + asset_id=txs[0].id) + txs.append(tx) + + # create the block + block = Block(txs) + # decouple assets + assets_from_block, block_dict = block.decouple_assets() + + # get the asset_ids and check that they are the same as the `CREATE` + # transactions + asset_ids = Block.get_asset_ids(block_dict) + assert asset_ids == [tx.id for tx in txs[:-1]] + + def test_from_db(self, b): + from bigchaindb.models import Block, Transaction + + assets = [ + {'msg': '1'}, + {'msg': '2'}, + {'msg': '3'}, + ] + + txs = [] + # create 3 assets + for asset in assets: + tx = Transaction.create([b.me], [([b.me], 1)], asset=asset) + txs.append(tx) + + # create a `TRANSFER` transaction. + # the asset in `TRANSFER` transactions is not extracted + tx = Transaction.transfer(txs[0].to_inputs(), [([b.me], 1)], + asset_id=txs[0].id) + txs.append(tx) + + # create the block + block = Block(txs) + # decouple assets + assets_from_block, block_dict = block.decouple_assets() + + # write the assets and block separatedly + b.write_assets(assets_from_block) + b.write_block(block) + + # check the reconstructed block is the same as the original block + block_from_db = Block.from_db(b, block_dict) + assert block == block_from_db From eb94181e1bc0ed904b25a3a1c626c5f2d863993b Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Thu, 11 May 2017 12:00:01 +0200 Subject: [PATCH 23/60] Fixed tests. Updated fixtures to flush the assets table after each test. --- tests/test_block_model.py | 2 ++ tests/utils.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/test_block_model.py b/tests/test_block_model.py index 981a64d7..f2b307de 100644 --- a/tests/test_block_model.py +++ b/tests/test_block_model.py @@ -1,3 +1,4 @@ +import pytest from pytest import raises @@ -253,6 +254,7 @@ class TestBlockModel(object): asset_ids = Block.get_asset_ids(block_dict) assert asset_ids == [tx.id for tx in txs[:-1]] + @pytest.mark.bdb def test_from_db(self, b): from bigchaindb.models import Block, Transaction diff --git a/tests/utils.py b/tests/utils.py index 53ffd86b..9af72a5d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -32,6 +32,7 @@ def flush_rethink_db(connection, dbname): connection.run(r.db(dbname).table('bigchain').delete()) connection.run(r.db(dbname).table('backlog').delete()) connection.run(r.db(dbname).table('votes').delete()) + connection.run(r.db(dbname).table('assets').delete()) except r.ReqlOpFailedError: pass @@ -41,6 +42,7 @@ def flush_mongo_db(connection, dbname): connection.conn[dbname].bigchain.delete_many({}) connection.conn[dbname].backlog.delete_many({}) connection.conn[dbname].votes.delete_many({}) + connection.conn[dbname].assets.delete_many({}) @singledispatch From ce414e46f3da825143c7df3c72295a68f61f073d Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Thu, 11 May 2017 12:29:08 +0200 Subject: [PATCH 24/60] Added missing docstrings. Fixed typos. --- bigchaindb/backend/query.py | 4 ++-- bigchaindb/core.py | 18 +++++++++++++++++- bigchaindb/models.py | 22 +++++++++++++++++++--- tests/test_block_model.py | 2 +- 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 5c37647c..8245fb3d 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -293,8 +293,8 @@ def get_last_voted_block_id(connection, node_pubkey): node_pubkey (str): base58 encoded public key. Returns: - The last block the node has voted on. If the node didn't cast - any vote then the genesis block is returned. + The last block id the node has voted on. If the node didn't cast + any vote then the genesis block id is returned. """ raise NotImplementedError diff --git a/bigchaindb/core.py b/bigchaindb/core.py index c6ff5608..1cd21222 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -641,8 +641,24 @@ class Bigchain(object): return self.block_election(block)['status'] def get_assets(self, asset_ids): - # TODO: write docstrings + """ + Return a list of assets that match the asset_ids + + Args: + asset_ids (:obj:`list` of :obj:`str`): A list of asset_ids to + retrieve from the database. + + Returs: + list: The list of assets returned from the database. + """ return backend.query.get_assets(self.connection, asset_ids) def write_assets(self, assets): + """ + Writes a list of assets into the database. + + Args: + assets (:obj:`list` of :obj:`dict`): A list of assets to write to + the database. + """ return backend.query.write_assets(self.connection, assets) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 53266930..c77c3338 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -88,8 +88,22 @@ class Transaction(Transaction): @classmethod def from_db(cls, bigchain, tx_dict): - # TODO: write docstring - if tx_dict['operation'] in [Transaction.CREATE, Transaction.CREATE]: + """ + Helper method that reconstructs a transaction dict that was returned + from the database. It checks what asset_id to retrieve, retrieves the + asset from the asset table and reconstructs the transaction. + + Args: + bigchain (:class:`~bigchaindb.Bigchain`): An instance of Bigchain + used to perform database queries. + tx_dict (:obj:`dict`): The transaction dict as returned from the + database. + + Returns: + :class:`~Transaction` + + """ + if tx_dict['operation'] in [Transaction.CREATE, Transaction.GENESIS]: # TODO: Maybe replace this call to a call to get_asset_by_id asset = list(bigchain.get_assets([tx_dict['id']]))[0] asset.pop('id') @@ -317,12 +331,14 @@ class Block(object): def from_db(cls, bigchain, block_dict): """ Helper method that reconstructs a block_dict that was returned from - the database. If checks what asset_ids to retrieve, retrieves the + the database. It checks what asset_ids to retrieve, retrieves the assets from the assets table and reconstructs the block. Args: bigchain (:class:`~bigchaindb.Bigchain`): An instance of Bigchain used to perform database queries. + block_dict(:obj:`dict`): The block dict as returned from the + database. Returns: :class:`~Block` diff --git a/tests/test_block_model.py b/tests/test_block_model.py index f2b307de..0824c40f 100644 --- a/tests/test_block_model.py +++ b/tests/test_block_model.py @@ -281,7 +281,7 @@ class TestBlockModel(object): # decouple assets assets_from_block, block_dict = block.decouple_assets() - # write the assets and block separatedly + # write the assets and block separately b.write_assets(assets_from_block) b.write_block(block) From f3cd43e5a926b2523bcba450ca68871ede41ef0e Mon Sep 17 00:00:00 2001 From: Sylvain Bellemare Date: Thu, 11 May 2017 14:51:51 +0200 Subject: [PATCH 25/60] Make GET /blocks status arg case insensitive --- bigchaindb/web/views/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigchaindb/web/views/blocks.py b/bigchaindb/web/views/blocks.py index 1ea1a28f..2471739d 100644 --- a/bigchaindb/web/views/blocks.py +++ b/bigchaindb/web/views/blocks.py @@ -42,7 +42,7 @@ class BlockListApi(Resource): """ parser = reqparse.RequestParser() parser.add_argument('tx_id', type=str, required=True) - parser.add_argument('status', type=str, + parser.add_argument('status', type=str, case_sensitive=False, choices=[Bigchain.BLOCK_VALID, Bigchain.BLOCK_INVALID, Bigchain.BLOCK_UNDECIDED]) args = parser.parse_args(strict=True) From 9d3eb23abf68deabf40ea643c238928022418f31 Mon Sep 17 00:00:00 2001 From: Sylvain Bellemare Date: Thu, 11 May 2017 16:13:05 +0200 Subject: [PATCH 26/60] Add workaround for hostname of events api link addresses #1465 --- bigchaindb/web/views/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bigchaindb/web/views/base.py b/bigchaindb/web/views/base.py index 0c226d7d..e4ae980b 100644 --- a/bigchaindb/web/views/base.py +++ b/bigchaindb/web/views/base.py @@ -28,4 +28,7 @@ def base_url(): def base_ws_uri(): """Base websocket uri.""" - return 'ws://{host}:{port}'.format(**config['wsserver']) + # TODO Revisit as this is a workaround to address issue + # https://github.com/bigchaindb/bigchaindb/issues/1465. + host = request.environ['HTTP_HOST'].split(':')[0] + return 'ws://{}:{}'.format(host, config['wsserver']['port']) From f08edd5a528510341d618b5534a6179052a410a3 Mon Sep 17 00:00:00 2001 From: vrde Date: Fri, 12 May 2017 14:31:58 +0200 Subject: [PATCH 27/60] Unpin flask-cors, revert extra changes --- bigchaindb/utils.py | 11 ++++++++--- bigchaindb/web/server.py | 1 + setup.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/bigchaindb/utils.py b/bigchaindb/utils.py index 4dad9377..f87916b7 100644 --- a/bigchaindb/utils.py +++ b/bigchaindb/utils.py @@ -114,11 +114,16 @@ def condition_details_has_owner(condition_details, owner): def output_has_owner(output, owner): - if len(output['public_keys']) > 0: + # TODO + # Check whether it is really necessary to treat the single key case + # differently from the multiple keys case, and why not just use the same + # function for both cases. + if len(output['public_keys']) > 1: return condition_details_has_owner( output['condition']['details'], owner) - return False - # # TODO raise proper exception, e.g. invalid tx payload? + elif len(output['public_keys']) == 1: + return output['condition']['details']['public_key'] == owner + # TODO raise proper exception, e.g. invalid tx payload? def is_genesis_block(block): diff --git a/bigchaindb/web/server.py b/bigchaindb/web/server.py index 16c76b01..539d3f73 100644 --- a/bigchaindb/web/server.py +++ b/bigchaindb/web/server.py @@ -2,6 +2,7 @@ The application is implemented in Flask and runs using Gunicorn. """ + import copy import multiprocessing diff --git a/setup.py b/setup.py index c6a5f63d..ce509518 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ install_requires = [ 'python-rapidjson==0.0.11', 'logstats>=0.2.1', 'flask>=0.10.1', - 'flask-cors==2.1.2', + 'flask-cors~=2.1.2', 'flask-restful~=0.3.0', 'requests~=2.9', 'gunicorn~=19.0', From ccaae91601d174dbede2102458410597ec14a8ed Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Fri, 12 May 2017 15:22:11 +0200 Subject: [PATCH 28/60] Added full text search support for mongodb. - Create widlcard text index for the assets collection. - Created backend query to to text search on assets collection. - Added and updated tests. --- bigchaindb/backend/mongodb/query.py | 25 ++++++++ bigchaindb/backend/mongodb/schema.py | 5 +- bigchaindb/backend/query.py | 8 +++ tests/backend/mongodb/test_queries.py | 84 +++++++++++++++++++++++++++ tests/backend/mongodb/test_schema.py | 2 +- 5 files changed, 122 insertions(+), 2 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 39d99d4a..3d989a3d 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -327,3 +327,28 @@ def get_unvoted_blocks(conn, node_pubkey): 'votes': False, '_id': False }} ])) + + +@register_query(MongoDBConnection) +def text_search(conn, search, language='english', case_sensitive=False, + diacritic_sensitive=False, text_score=False, limit=0): + cursor = conn.run( + conn.collection('assets') + .find({'$text': { + '$search': search, + '$language': language, + '$caseSensitive': case_sensitive, + '$diacriticSensitive': diacritic_sensitive}}, + {'score': {'$meta': 'textScore'}, '_id': False}) + .sort([('score', {'$meta': 'textScore'})]) + .limit(limit)) + + if text_score: + return cursor + else: + return (_remove_text_score(asset) for asset in cursor) + + +def _remove_text_score(asset): + asset.pop('score', None) + return asset diff --git a/bigchaindb/backend/mongodb/schema.py b/bigchaindb/backend/mongodb/schema.py index 12b873e0..6c54bfd8 100644 --- a/bigchaindb/backend/mongodb/schema.py +++ b/bigchaindb/backend/mongodb/schema.py @@ -2,7 +2,7 @@ import logging -from pymongo import ASCENDING, DESCENDING +from pymongo import ASCENDING, DESCENDING, TEXT from bigchaindb import backend from bigchaindb.common import exceptions @@ -113,3 +113,6 @@ def create_assets_secondary_index(conn, dbname): conn.conn[dbname]['assets'].create_index('id', name='asset_id', unique=True) + + # full text search index + conn.conn[dbname]['assets'].create_index([('$**', TEXT)], name='text') diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 8245fb3d..705b0306 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -325,3 +325,11 @@ def get_txids_filtered(connection, asset_id, operation=None): """ raise NotImplementedError + + +@singledispatch +def text_search(conn, search, language='english', case_sensitive=False, + diacritic_sensitive=False, text_score=False, limit=0): + # TODO: docstring + + raise NotImplementedError diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index c43c5fa4..9b2ad1cf 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -464,3 +464,87 @@ def test_get_assets(): assert cursor.count() == 2 assert list(cursor.sort('id', pymongo.ASCENDING)) == assets[::2] + + +def test_text_search(): + from bigchaindb.backend import connect, query + conn = connect() + + # Example data and tests cases taken from the mongodb documentation + # https://docs.mongodb.com/manual/reference/operator/query/text/ + assets = [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + {'id': 3, 'subject': 'Baking a cake', 'author': 'abc', 'views': 90}, + {'id': 4, 'subject': 'baking', 'author': 'xyz', 'views': 100}, + {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200}, + {'id': 6, 'subject': 'Сырники', 'author': 'jkl', 'views': 80}, + {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10}, + {'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10} + ] + + # insert the assets + conn.db.assets.insert_many(deepcopy(assets), ordered=False) + + # test search single word + assert list(query.text_search(conn, 'coffee')) == [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10}, + ] + + # match any of the search terms + assert list(query.text_search(conn, 'bake coffee cake')) == [ + {'author': 'abc', 'id': 3, 'subject': 'Baking a cake', 'views': 90}, + {'author': 'xyz', 'id': 1, 'subject': 'coffee', 'views': 50}, + {'author': 'xyz', 'id': 4, 'subject': 'baking', 'views': 100}, + {'author': 'efg', 'id': 2, 'subject': 'Coffee Shopping', 'views': 5}, + {'author': 'efg', 'id': 7, 'subject': 'coffee and cream', 'views': 10} + ] + + # search for a phrase + assert list(query.text_search(conn, '\"coffee shop\"')) == [ + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + ] + + # exclude documents that contain a term + assert list(query.text_search(conn, 'coffee -shop')) == [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, + {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10}, + ] + + # search different language + assert list(query.text_search(conn, 'leche', language='es')) == [ + {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200}, + {'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10} + ] + + # case and diacritic insensitive search + assert list(query.text_search(conn, 'сы́рники CAFÉS')) == [ + {'id': 6, 'subject': 'Сырники', 'author': 'jkl', 'views': 80}, + {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200}, + {'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10} + ] + + # case sensitive search + assert list(query.text_search(conn, 'Coffee', case_sensitive=True)) == [ + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + ] + + # diacritic sensitive search + assert list(query.text_search(conn, 'CAFÉ', diacritic_sensitive=True)) == [ + {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200}, + ] + + # return text score + assert list(query.text_search(conn, 'coffee', text_score=True)) == [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50, 'score': 1.0}, + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5, 'score': 0.75}, + {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10, 'score': 0.75}, + ] + + # limit search result + assert list(query.text_search(conn, 'coffee', limit=2)) == [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + ] diff --git a/tests/backend/mongodb/test_schema.py b/tests/backend/mongodb/test_schema.py index e3b320bd..e11dbfe8 100644 --- a/tests/backend/mongodb/test_schema.py +++ b/tests/backend/mongodb/test_schema.py @@ -33,7 +33,7 @@ def test_init_creates_db_tables_and_indexes(): assert sorted(indexes) == ['_id_', 'block_and_voter'] indexes = conn.conn[dbname]['assets'].index_information().keys() - assert sorted(indexes) == ['_id_', 'asset_id'] + assert sorted(indexes) == ['_id_', 'asset_id', 'text'] def test_init_database_fails_if_db_exists(): From edbe5deb466de8c66fb838d6f990b3df10460876 Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Mon, 15 May 2017 15:25:01 +0200 Subject: [PATCH 29/60] docs: moved old cloud depl. templates to Appendices & edited intros --- .../azure-quickstart-template.md | 2 +- docs/server/source/appendices/index.rst | 3 +++ .../template-ansible.md | 4 ++-- .../template-terraform-aws.md | 4 ++-- .../cloud-deployment-templates/index.rst | 18 +++++++++--------- docs/server/source/index.rst | 4 ++-- 6 files changed, 19 insertions(+), 16 deletions(-) rename docs/server/source/{cloud-deployment-templates => appendices}/azure-quickstart-template.md (93%) rename docs/server/source/{cloud-deployment-templates => appendices}/template-ansible.md (94%) rename docs/server/source/{cloud-deployment-templates => appendices}/template-terraform-aws.md (95%) diff --git a/docs/server/source/cloud-deployment-templates/azure-quickstart-template.md b/docs/server/source/appendices/azure-quickstart-template.md similarity index 93% rename from docs/server/source/cloud-deployment-templates/azure-quickstart-template.md rename to docs/server/source/appendices/azure-quickstart-template.md index 1bf35a31..59f52fd3 100644 --- a/docs/server/source/cloud-deployment-templates/azure-quickstart-template.md +++ b/docs/server/source/appendices/azure-quickstart-template.md @@ -1,6 +1,6 @@ # Azure Quickstart Template -If you didn't read the introduction to the [cloud deployment templates](index.html), please do that now. The main point is that they're not for deploying a production node; they can be used as a starting point. +This page outlines how to run a single BigchainDB node on the Microsoft Azure public cloud, with RethinkDB as the database backend. It uses an Azure Quickstart Template. That template is dated because we now recommend using MongoDB instead of RethinkDB. That's why we moved this page to the Appendices. Note: There was an Azure quickstart template in the `blockchain` directory of Microsoft's `Azure/azure-quickstart-templates` repository on GitHub. It's gone now; it was replaced by the one described here. diff --git a/docs/server/source/appendices/index.rst b/docs/server/source/appendices/index.rst index 1c969c05..f5931e64 100755 --- a/docs/server/source/appendices/index.rst +++ b/docs/server/source/appendices/index.rst @@ -18,6 +18,9 @@ Appendices backend commands aws-setup + template-terraform-aws + template-ansible + azure-quickstart-template generate-key-pair-for-ssh firewall-notes ntp-notes diff --git a/docs/server/source/cloud-deployment-templates/template-ansible.md b/docs/server/source/appendices/template-ansible.md similarity index 94% rename from docs/server/source/cloud-deployment-templates/template-ansible.md rename to docs/server/source/appendices/template-ansible.md index f296a2cf..508d0555 100644 --- a/docs/server/source/cloud-deployment-templates/template-ansible.md +++ b/docs/server/source/appendices/template-ansible.md @@ -1,9 +1,9 @@ # Template: Ansible Playbook to Run a BigchainDB Node on an Ubuntu Machine -If you didn't read the introduction to the [cloud deployment templates](index.html), please do that now. The main point is that they're not for deploying a production node; they can be used as a starting point. - This page explains how to use [Ansible](https://www.ansible.com/) to install, configure and run all the software needed to run a one-machine BigchainDB node on a server running Ubuntu 16.04. +**Note: We're not actively maintaining the associated Ansible files (e.g. playbooks). They are RethinkDB-specific, even though we now recommend using MongoDB. You may find the old Ansible stuff useful nevertheless, which is why we moved this page to the Appendices rather than deleting it.** + ## Install Ansible diff --git a/docs/server/source/cloud-deployment-templates/template-terraform-aws.md b/docs/server/source/appendices/template-terraform-aws.md similarity index 95% rename from docs/server/source/cloud-deployment-templates/template-terraform-aws.md rename to docs/server/source/appendices/template-terraform-aws.md index d4a22e83..055a5ee3 100644 --- a/docs/server/source/cloud-deployment-templates/template-terraform-aws.md +++ b/docs/server/source/appendices/template-terraform-aws.md @@ -1,8 +1,8 @@ # Template: Using Terraform to Provision an Ubuntu Machine on AWS -If you didn't read the introduction to the [cloud deployment templates](index.html), please do that now. The main point is that they're not for deploying a production node; they can be used as a starting point. +This page explains a way to use [Terraform](https://www.terraform.io/) to provision an Ubuntu machine (i.e. an EC2 instance with Ubuntu 16.04) and other resources on [AWS](https://aws.amazon.com/). That machine can then be used to host a one-machine BigchainDB node, for example. -This page explains a way to use [Terraform](https://www.terraform.io/) to provision an Ubuntu machine (i.e. an EC2 instance with Ubuntu 16.04) and other resources on [AWS](https://aws.amazon.com/). That machine can then be used to host a one-machine BigchainDB node. +**Note: We're not actively maintaining the associated Terraform files. You may find them useful nevertheless, which is why we moved this page to the Appendices rather than deleting it.** ## Install Terraform diff --git a/docs/server/source/cloud-deployment-templates/index.rst b/docs/server/source/cloud-deployment-templates/index.rst index 41eec0ed..34a4b28b 100644 --- a/docs/server/source/cloud-deployment-templates/index.rst +++ b/docs/server/source/cloud-deployment-templates/index.rst @@ -1,18 +1,18 @@ -Cloud Deployment Templates -========================== +Production Deployment Template +============================== -We have some "templates" to deploy a basic, working, but bare-bones BigchainDB node on various cloud providers. They should *not* be used as-is to deploy a node for production. They can be used as a starting point. +This section outlines how *we* deploy production BigchainDB nodes and clusters +on Microsoft Azure +using Kubernetes. +We improve it constantly. +You may choose to use it as a template or reference for your own deployment, +but *we make no claim that it is suitable for your purposes*. +Feel free change things to suit your needs or preferences. -You don't have to use the tools we use in the templates. You can use whatever tools you prefer. - -If you find the cloud deployment templates for nodes helpful, then you may also be interested in our scripts for :doc:`deploying a testing cluster on AWS <../clusters-feds/aws-testing-cluster>` (documented in the Clusters section). .. toctree:: :maxdepth: 1 - template-terraform-aws - template-ansible - azure-quickstart-template template-kubernetes-azure node-on-kubernetes add-node-on-kubernetes diff --git a/docs/server/source/index.rst b/docs/server/source/index.rst index bf45aca0..0dee9174 100644 --- a/docs/server/source/index.rst +++ b/docs/server/source/index.rst @@ -7,14 +7,14 @@ BigchainDB Server Documentation ← Back to All BigchainDB Docs introduction quickstart - cloud-deployment-templates/index production-nodes/index + clusters-feds/index + cloud-deployment-templates/index dev-and-test/index server-reference/index http-client-server-api websocket-event-stream-api drivers-clients/index - clusters-feds/index data-models/index schema/transaction schema/vote From b8407d603adeaf6c89b7b2c92a6b90a8cb8a4b8f Mon Sep 17 00:00:00 2001 From: "krish7919 (Krish)" Date: Wed, 10 May 2017 14:58:05 +0200 Subject: [PATCH 30/60] Deploy events API on k8s over on HTTP --- .../source/cloud-deployment-templates/first-node.rst | 9 +++++++++ k8s/bigchaindb/bigchaindb-dep.yaml | 10 +++++++++- k8s/bigchaindb/bigchaindb-svc.yaml | 3 +++ k8s/nginx/nginx-dep.yaml | 12 ++++++++++-- k8s/nginx/nginx-svc.yaml | 4 ++++ k8s/toolbox/Dockerfile | 5 +++-- 6 files changed, 38 insertions(+), 5 deletions(-) diff --git a/docs/server/source/cloud-deployment-templates/first-node.rst b/docs/server/source/cloud-deployment-templates/first-node.rst index 9130696a..0ba0d31b 100644 --- a/docs/server/source/cloud-deployment-templates/first-node.rst +++ b/docs/server/source/cloud-deployment-templates/first-node.rst @@ -4,6 +4,7 @@ First Node or Bootstrap Node Setup This document is a work in progress and will evolve over time to include security, websocket and other settings. + Step 1: Set Up the Cluster -------------------------- @@ -421,8 +422,12 @@ Step 17. Verify that the Cluster is Correctly Set Up nslookup bdb-instance-0 dig +noall +answer _bdb-port._tcp.bdb-instance-0.default.svc.cluster.local SRV + + dig +noall +answer _bdb-ws-port._tcp.bdb-instance-0.default.svc.cluster.local SRV curl -X GET http://bdb-instance-0:9984 + + wsc ws://bdb-instance-0:9985/api/v1/streams/valid_tx * Verify NGINX instance @@ -435,12 +440,16 @@ Step 17. Verify that the Cluster is Correctly Set Up curl -X GET http://ngx-instance-0:27017 # results in curl: (56) Recv failure: Connection reset by peer dig +noall +answer _ngx-public-bdb-port._tcp.ngx-instance-0.default.svc.cluster.local SRV + + dig +noall +answer _ngx-public-ws-port._tcp.ngx-instance-0.default.svc.cluster.local SRV * If you have run the vanilla NGINX instance, run .. code:: bash curl -X GET http://ngx-instance-0:80 + + wsc ws://ngx-instance-0:81/api/v1/streams/valid_tx * If you have the OpenResty NGINX + 3scale instance, run diff --git a/k8s/bigchaindb/bigchaindb-dep.yaml b/k8s/bigchaindb/bigchaindb-dep.yaml index b8550249..1e8ca56d 100644 --- a/k8s/bigchaindb/bigchaindb-dep.yaml +++ b/k8s/bigchaindb/bigchaindb-dep.yaml @@ -35,6 +35,10 @@ spec: value: bigchain - name: BIGCHAINDB_SERVER_BIND value: 0.0.0.0:9984 + - name: BIGCHAINDB_WSSERVER_HOST + value: 0.0.0.0 + - name: BIGCHAINDB_WSSERVER_PORT + value: "9985" - name: BIGCHAINDB_KEYPAIR_PUBLIC value: "" - name: BIGCHAINDB_KEYPAIR_PRIVATE @@ -54,7 +58,11 @@ spec: - containerPort: 9984 hostPort: 9984 name: bdb-port - protocol: TCP + protocol: TCP + - containerPort: 9985 + hostPort: 9985 + name: bdb-ws-port + protocol: TCP resources: limits: cpu: 200m diff --git a/k8s/bigchaindb/bigchaindb-svc.yaml b/k8s/bigchaindb/bigchaindb-svc.yaml index 9927a92d..272cb533 100644 --- a/k8s/bigchaindb/bigchaindb-svc.yaml +++ b/k8s/bigchaindb/bigchaindb-svc.yaml @@ -12,5 +12,8 @@ spec: - port: 9984 targetPort: 9984 name: bdb-port + - port: 9985 + targetPort: 9985 + name: bdb-ws-port type: ClusterIP clusterIP: None diff --git a/k8s/nginx/nginx-dep.yaml b/k8s/nginx/nginx-dep.yaml index 684ae552..0aad0b2d 100644 --- a/k8s/nginx/nginx-dep.yaml +++ b/k8s/nginx/nginx-dep.yaml @@ -43,15 +43,23 @@ spec: configMapKeyRef: name: mongodb-whitelist key: allowed-hosts + - name: BIGCHAINDB_WS_FRONTEND_PORT + value: "81" + - name: BIGCHAINDB_WS_BACKEND_PORT + value: "9985" ports: - containerPort: 27017 hostPort: 27017 name: public-mdb-port - protocol: TCP + protocol: TCP - containerPort: 80 hostPort: 80 name: public-bdb-port - protocol: TCP + protocol: TCP + - containerPort: 81 + hostPort: 81 + name: public-ws-port + protocol: TCP resources: limits: cpu: 200m diff --git a/k8s/nginx/nginx-svc.yaml b/k8s/nginx/nginx-svc.yaml index 8b0cded4..b9d8bcaf 100644 --- a/k8s/nginx/nginx-svc.yaml +++ b/k8s/nginx/nginx-svc.yaml @@ -21,4 +21,8 @@ spec: targetPort: 80 name: ngx-public-bdb-port protocol: TCP + - port: 81 + targetPort: 81 + name: ngx-public-ws-port + protocol: TCP type: LoadBalancer diff --git a/k8s/toolbox/Dockerfile b/k8s/toolbox/Dockerfile index c9adfb5e..2beeb9d1 100644 --- a/k8s/toolbox/Dockerfile +++ b/k8s/toolbox/Dockerfile @@ -7,9 +7,10 @@ FROM alpine:3.5 LABEL maintainer "dev@bigchaindb.com" WORKDIR / RUN apk add --no-cache --update curl bind-tools python3-dev g++ \ - libffi-dev make vim git \ + libffi-dev make vim git nodejs \ && pip3 install ipython \ && git clone https://github.com/bigchaindb/bigchaindb-driver \ && cd bigchaindb-driver \ - && pip3 install -e . + && pip3 install -e . \ + && npm install -g wsc ENTRYPOINT ["/bin/sh"] From 5ae8c8147455eedd22bf5b08ddca056cfce9d38b Mon Sep 17 00:00:00 2001 From: vrde Date: Thu, 18 May 2017 11:36:24 +0200 Subject: [PATCH 31/60] Use "allow_headers" keyword to initialize CORS Fix #1478 --- bigchaindb/web/server.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bigchaindb/web/server.py b/bigchaindb/web/server.py index 539d3f73..3c33a33a 100644 --- a/bigchaindb/web/server.py +++ b/bigchaindb/web/server.py @@ -62,7 +62,7 @@ def create_app(*, debug=False, threads=4): app = Flask(__name__) CORS(app, - headers=( + allow_headers=( 'x-requested-with', 'content-type', 'accept', diff --git a/setup.py b/setup.py index ce509518..55543ea3 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,7 @@ install_requires = [ 'python-rapidjson==0.0.11', 'logstats>=0.2.1', 'flask>=0.10.1', - 'flask-cors~=2.1.2', + 'flask-cors~=3.0.0', 'flask-restful~=0.3.0', 'requests~=2.9', 'gunicorn~=19.0', From 3995e22d4afaa4d60848f62cd6b635bdaafedabf Mon Sep 17 00:00:00 2001 From: vrde Date: Thu, 18 May 2017 13:08:46 +0200 Subject: [PATCH 32/60] Fail if config file not found --- bigchaindb/config_utils.py | 5 ++++- tests/commands/test_commands.py | 6 ++++-- tests/conftest.py | 2 +- tests/test_config_utils.py | 12 ++++++++++++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/bigchaindb/config_utils.py b/bigchaindb/config_utils.py index 5a72a7d6..57d10f74 100644 --- a/bigchaindb/config_utils.py +++ b/bigchaindb/config_utils.py @@ -238,7 +238,10 @@ def autoconfigure(filename=None, config=None, force=False): try: newconfig = update(newconfig, file_config(filename=filename)) except FileNotFoundError as e: - logger.warning('Cannot find config file `%s`.' % e.filename) + if filename: + raise + else: + logger.info('Cannot find config file `%s`.' % e.filename) # override configuration with env variables newconfig = env_config(newconfig) diff --git a/tests/commands/test_commands.py b/tests/commands/test_commands.py index 37079ddd..b50a2a67 100644 --- a/tests/commands/test_commands.py +++ b/tests/commands/test_commands.py @@ -88,11 +88,12 @@ def test_bigchain_show_config(capsys): assert output_config == config +@pytest.mark.usefixtures('ignore_local_config_file') def test_bigchain_export_my_pubkey_when_pubkey_set(capsys, monkeypatch): from bigchaindb import config from bigchaindb.commands.bigchaindb import run_export_my_pubkey - args = Namespace(config='dummy') + args = Namespace(config=None) # so in run_export_my_pubkey(args) below, # filename=args.config='dummy' is passed to autoconfigure(). # We just assume autoconfigure() works and sets @@ -107,11 +108,12 @@ def test_bigchain_export_my_pubkey_when_pubkey_set(capsys, monkeypatch): assert 'Charlie_Bucket' in lines +@pytest.mark.usefixtures('ignore_local_config_file') def test_bigchain_export_my_pubkey_when_pubkey_not_set(monkeypatch): from bigchaindb import config from bigchaindb.commands.bigchaindb import run_export_my_pubkey - args = Namespace(config='dummy') + args = Namespace(config=None) monkeypatch.setitem(config['keypair'], 'public', None) # assert that run_export_my_pubkey(args) raises SystemExit: with pytest.raises(SystemExit) as exc_info: diff --git a/tests/conftest.py b/tests/conftest.py index 26beac11..d60b4511 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -199,7 +199,7 @@ def _genesis(_bdb, genesis_block): @pytest.fixture def ignore_local_config_file(monkeypatch): def mock_file_config(filename=None): - raise FileNotFoundError() + return {} monkeypatch.setattr('bigchaindb.config_utils.file_config', mock_file_config) diff --git a/tests/test_config_utils.py b/tests/test_config_utils.py index bb445d83..88c3431e 100644 --- a/tests/test_config_utils.py +++ b/tests/test_config_utils.py @@ -257,6 +257,18 @@ def test_autoconfigure_env_precedence(monkeypatch): assert bigchaindb.config['server']['bind'] == 'localhost:9985' +def test_autoconfigure_explicit_file(monkeypatch): + from bigchaindb import config_utils + + def file_config(*args, **kwargs): + raise FileNotFoundError() + + monkeypatch.setattr('bigchaindb.config_utils.file_config', file_config) + + with pytest.raises(FileNotFoundError): + config_utils.autoconfigure(filename='autoexec.bat') + + def test_update_config(monkeypatch): import bigchaindb from bigchaindb import config_utils From cab2ea11abe4f4e0455667b203f9d4216d79a56d Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Thu, 18 May 2017 14:39:23 +0200 Subject: [PATCH 33/60] use default mongodb replica set name when run mongod for tests --- tests/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/README.md b/tests/README.md index d0e2da52..e6de82c9 100644 --- a/tests/README.md +++ b/tests/README.md @@ -27,8 +27,7 @@ BigchainDB from source. The [`CONTRIBUTING.md` file](../CONTRIBUTING.md) has instructions for how to do that. Next, make sure you have RethinkDB or MongoDB running in the background. You -can run RethinkDB using `rethinkdb --daemon` or MongoDB using `mongod ---replSet=rs0`. +can run RethinkDB using `rethinkdb --daemon` or MongoDB using `mongod --replSet=bigchain-rs`. The `pytest` command has many options. If you want to learn about all the things you can do with pytest, see [the pytest From 780a9c9bf5bd7390c530124e89f2ba73f6ed743d Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Thu, 18 May 2017 14:41:16 +0200 Subject: [PATCH 34/60] Quickstart docs: tell them to run MongoDB using 'sudo mongod ...' --- docs/server/source/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/server/source/quickstart.md b/docs/server/source/quickstart.md index 2eae07f0..5c2b0500 100644 --- a/docs/server/source/quickstart.md +++ b/docs/server/source/quickstart.md @@ -8,7 +8,7 @@ A. Install MongoDB as the database backend. (There are other options but you can B. Run MongoDB. Open a Terminal and run the command: ```text -$ mongod --replSet=bigchain-rs +$ sudo mongod --replSet=bigchain-rs ``` C. Ubuntu 16.04 already has Python 3.5, so you don't need to install it, but you do need to install some other things: From 69cafee156d088ef4de0fca0fc4dfbc53ecb3678 Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Wed, 17 May 2017 15:20:20 +0200 Subject: [PATCH 35/60] Add steps to Release_Process.md, to update Docker image version for k8s --- Release_Process.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Release_Process.md b/Release_Process.md index e4a988a1..be4c448a 100644 --- a/Release_Process.md +++ b/Release_Process.md @@ -10,6 +10,7 @@ that [major version 0.x does not export a stable API](http://semver.org/#spec-it A minor release is preceeded by a feature freeze and created from the 'master' branch. This is a summary of the steps we go through to release a new minor version of BigchainDB Server. 1. Update the `CHANGELOG.md` file in master +1. In `k8s/bigchaindb/bigchaindb-dep.yaml`, find the line of the form `image: bigchaindb/bigchaindb:0.8.1` and change the version number to the new version number, e.g. `0.9.0`. (This is the Docker image that Kubernetes should pull from Docker Hub.) Commit that change to master 1. Create and checkout a new branch for the minor release, named after the minor version, without a preceeding 'v', e.g. `git checkout -b 0.9` (*not* 0.9.0, this new branch will be for e.g. 0.9.0, 0.9.1, 0.9.2, etc. each of which will be identified by a tagged commit) 1. In `bigchaindb/version.py`, update `__version__` and `__short_version__`, e.g. to `0.9` and `0.9.0` (with no `.dev` on the end) 1. Commit that change, and push the new branch to GitHub @@ -26,8 +27,11 @@ A patch release is similar to a minor release, but piggybacks on an existing min 1. Check out the minor release branch, e.g. `0.9` 1. Apply the changes you want, e.g. using `git cherry-pick`. 1. Update the `CHANGELOG.md` file -1. Increment the patch version in `bigchaindb/version.py`, e.g. "0.9.1" -1. Commit that change, and push the updated branch to GitHub +1. Increment the patch version in `bigchaindb/version.py`, e.g. `0.9.1` +1. Commit that change +1. In `k8s/bigchaindb/bigchaindb-dep.yaml`, find the line of the form `image: bigchaindb/bigchaindb:0.9.0` and change the version number to the new version number, e.g. `0.9.1`. (This is the Docker image that Kubernetes should pull from Docker Hub.) +1. Commit that change +1. Push the updated minor release branch to GitHub 1. Follow steps outlined in [Common Steps](#common-steps) 1. Cherry-pick the `CHANGELOG.md` update commit (made above) to the `master` branch From c6133c827d51edb5c10a8577b15b37ecf982b5d2 Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Wed, 17 May 2017 14:29:37 +0200 Subject: [PATCH 36/60] docs: updated link to JS/nodejs driver --- docs/server/source/drivers-clients/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/server/source/drivers-clients/index.rst b/docs/server/source/drivers-clients/index.rst index 0bfde7ad..ef749d55 100644 --- a/docs/server/source/drivers-clients/index.rst +++ b/docs/server/source/drivers-clients/index.rst @@ -20,7 +20,7 @@ Community-Driven Libraries and Tools Some of these projects are a work in progress, but may still be useful. -* `Javascript transaction builder `_ +* `JavaScript / Node.js driver `_ * `Haskell transaction builder `_ * `Go driver `_ * `Java driver `_ From b6ec3e5f5cfe794c84e26c6d538d8a4716eb4207 Mon Sep 17 00:00:00 2001 From: Krish Date: Fri, 19 May 2017 11:18:44 +0200 Subject: [PATCH 37/60] Mongodb TLS (#1456) * Support for secure TLS communication in MongoDB, MongoDB Monitoring Agent and MongoDB Backup Agent - Move from Golang to Bash for entrypoint program - Update image tag to 2.0 for Backup and Monitoring Agents and to 3.4.4 for MongoDB - Add documentation * changed title & rewrote Step 1 of workflow.rst * copy-edited ca-installation.rst * copy-edited & modified structure of workflow.rst * moved repeated Easy-RSA install & config docs to new page * edited the sentences describing the Easy-RSA dirs * copy-edited the page about generating server certificate * copy-edited the page about generating client certificate * renamed page to 'How to Set Up a Self-Signed Certificate Authority' * copy-edited page about how to revoke a certificate * Comments on how to uniquely name all instances in the cluster * Added comments about the other questions when setting up a CA * Added note about one Agent Api Key per Cloud Manager backup * docs: clarified instructions for generating server CSR * docs: added back 'from your PKI infrastructure' * docs: fixed step & added step re/ FQDNs & certs in workflow.rst * docs: added note re/ the Distinguished Name * Update docs for env vars setup * docs: added tip: how to get help with the easyrsa command --- .../ca-installation.rst | 89 ++++++++++ .../client-tls-certificate.rst | 77 +++++++++ .../cloud-deployment-templates/easy-rsa.rst | 84 ++++++++++ .../cloud-deployment-templates/first-node.rst | 2 +- .../cloud-deployment-templates/index.rst | 6 + .../revoke-tls-certificate.rst | 42 +++++ .../server-tls-certificate.rst | 92 +++++++++++ .../template-kubernetes-azure.rst | 5 +- .../cloud-deployment-templates/workflow.rst | 123 ++++++++++++++ .../container/docker_build_and_push.bash | 4 +- .../mongodb_backup_agent_entrypoint.bash | 21 ++- .../container/docker_build_and_push.bash | 4 +- .../mongodb_mon_agent_entrypoint.bash | 15 +- k8s/mongodb/container/Dockerfile | 13 +- k8s/mongodb/container/Makefile | 51 ------ k8s/mongodb/container/README.md | 44 +++-- k8s/mongodb/container/mongod.conf.template | 23 ++- k8s/mongodb/container/mongod_entrypoint.bash | 91 +++++++++++ .../mongod_entrypoint/mongod_entrypoint.go | 154 ------------------ 19 files changed, 698 insertions(+), 242 deletions(-) create mode 100644 docs/server/source/cloud-deployment-templates/ca-installation.rst create mode 100644 docs/server/source/cloud-deployment-templates/client-tls-certificate.rst create mode 100644 docs/server/source/cloud-deployment-templates/easy-rsa.rst create mode 100644 docs/server/source/cloud-deployment-templates/revoke-tls-certificate.rst create mode 100644 docs/server/source/cloud-deployment-templates/server-tls-certificate.rst create mode 100644 docs/server/source/cloud-deployment-templates/workflow.rst delete mode 100644 k8s/mongodb/container/Makefile create mode 100755 k8s/mongodb/container/mongod_entrypoint.bash delete mode 100644 k8s/mongodb/container/mongod_entrypoint/mongod_entrypoint.go diff --git a/docs/server/source/cloud-deployment-templates/ca-installation.rst b/docs/server/source/cloud-deployment-templates/ca-installation.rst new file mode 100644 index 00000000..9ea38477 --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/ca-installation.rst @@ -0,0 +1,89 @@ +How to Set Up a Self-Signed Certificate Authority +================================================= + +This page enumerates the steps *we* use to set up a self-signed certificate authority (CA). +This is something that only needs to be done once per cluster, +by the organization managing the cluster, i.e. the CA is for the whole cluster. +We use Easy-RSA. + + +Step 1: Install & Configure Easy-RSA +------------------------------------ + +First create a directory for the CA and cd into it: + +.. code:: bash + + mkdir bdb-cluster-ca + + cd bdb-cluster-ca + +Then :ref:`install and configure Easy-RSA in that directory `. + + +Step 2: Create a Self-Signed CA +------------------------------- + +You can create a self-signed CA +by going to the ``bdb-cluster-ca/easy-rsa-3.0.1/easyrsa3`` directory and using: + +.. code:: bash + + ./easyrsa init-pki + + ./easyrsa build-ca + + +You will be asked to enter a PEM pass phrase for encrypting the ``ca.key`` file. +Make sure to securely store that PEM pass phrase. +If you lose it, you won't be able to add or remove entities from your PKI infrastructure in the future. + +It will ask several other questions. +You can accept all the defaults [in brackets] by pressing Enter. +While ``Easy-RSA CA`` *is* a valid and acceptable Common Name, +you should probably enter a name based on the name of the managing organization, +e.g. ``Omega Ledger CA``. + +Tip: You can get help with the ``easyrsa`` command (and its subcommands) +by using the subcommand ``./easyrsa help`` + + +Step 3: Create an Intermediate CA +--------------------------------- + +TODO(Krish) + +Step 4: Generate a Certificate Revocation List +---------------------------------------------- + +You can generate a Certificate Revocation List (CRL) using: + +.. code:: bash + + ./easyrsa gen-crl + +You will need to run this command every time you revoke a certificate and the +generated ``crl.pem`` needs to be uploaded to your infrastructure to prevent +the revoked certificate from being used again. + + +Step 5: Secure the CA +--------------------- + +The security of your infrastructure depends on the security of this CA. + +- Ensure that you restrict access to the CA and enable only legitimate and + required people to sign certificates and generate CRLs. + +- Restrict access to the machine where the CA is hosted. + +- Many certificate providers keep the CA offline and use a rotating + intermediate CA to sign and revoke certificates, to mitigate the risk of the + CA getting compromised. + +- In case you want to destroy the machine where you created the CA + (for example, if this was set up on a cloud provider instance), + you can backup the entire ``easyrsa`` directory + to secure storage. You can always restore it to a trusted instance again + during the times when you want to sign or revoke certificates. + Remember to backup the directory after every update. diff --git a/docs/server/source/cloud-deployment-templates/client-tls-certificate.rst b/docs/server/source/cloud-deployment-templates/client-tls-certificate.rst new file mode 100644 index 00000000..60a754a0 --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/client-tls-certificate.rst @@ -0,0 +1,77 @@ +How to Generate a Client Certificate for MongoDB +================================================ + +This page enumerates the steps *we* use +to generate a client certificate +to be used by clients who want to connect to a TLS-secured MongoDB cluster. +We use Easy-RSA. + + +Step 1: Install and Configure Easy-RSA +-------------------------------------- + +First create a directory for the client certificate and cd into it: + +.. code:: bash + + mkdir client-cert + + cd client-cert + +Then :ref:`install and configure Easy-RSA in that directory `. + + +Step 2: Create the Client Private Key and CSR +--------------------------------------------- + +You can create the client private key and certificate signing request (CSR) +by going into the directory ``client-cert/easy-rsa-3.0.1/easyrsa`` +and using: + +.. code:: bash + + ./easyrsa init-pki + + ./easyrsa gen-req bdb-instance-0 nopass + + +You should change ``bdb-instance-0`` to a value based on the client +the certificate is for. + +Tip: You can get help with the ``easyrsa`` command (and its subcommands) +by using the subcommand ``./easyrsa help`` + + +Step 3: Get the Client Certificate Signed +----------------------------------------- + +The CSR file (created in the last step) +should be located in ``pki/reqs/bdb-instance-0.req``. +You need to send it to the organization managing the cluster +so that they can use their CA +to sign the request. +(The managing organization should already have a self-signed CA.) + +If you are the admin of the managing organization's self-signed CA, +then you can import the CSR and use Easy-RSA to sign it. For example: + +.. code:: bash + + ./easyrsa import-req bdb-instance-0.req bdb-instance-0 + + ./easyrsa sign-req client bdb-instance-0 + +Once you have signed it, you can send the signed certificate +and the CA certificate back to the requestor. +The files are ``pki/issued/bdb-instance-0.crt`` and ``pki/ca.crt``. + + +Step 4: Generate the Consolidated Client PEM File +------------------------------------------------- + +MongoDB requires a single, consolidated file containing both the public and +private keys. + +.. code:: bash + + cat bdb-instance-0.crt bdb-instance-0.key > bdb-instance-0.pem diff --git a/docs/server/source/cloud-deployment-templates/easy-rsa.rst b/docs/server/source/cloud-deployment-templates/easy-rsa.rst new file mode 100644 index 00000000..50a62cd5 --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/easy-rsa.rst @@ -0,0 +1,84 @@ +How to Install & Configure Easy-RSA +=================================== + +We use +`Easy-RSA version 3 +`_, a +wrapper over complex ``openssl`` commands. +`Easy-RSA is available on GitHub `_ and licensed under GPLv2. + + +Step 1: Install Easy-RSA Dependencies +------------------------------------- + +The only dependency for Easy-RSA v3 is ``openssl``, +which is available from the ``openssl`` package on Ubuntu and other +Debian-based operating systems, i.e. you can install it using: + +.. code:: bash + + sudo apt-get update + + sudo apt-get install openssl + + +Step 2: Install Easy-RSA +------------------------ + +Make sure you're in the directory where you want Easy-RSA to live, +then download it and extract it within that directory: + +.. code:: bash + + wget https://github.com/OpenVPN/easy-rsa/archive/3.0.1.tar.gz + + tar xzvf 3.0.1.tar.gz + + rm 3.0.1.tar.gz + +There should now be a directory named ``easy-rsa-3.0.1`` +in your current directory. + + +Step 3: Customize the Easy-RSA Configuration +-------------------------------------------- + +We now create a config file named ``vars`` +by copying the existing ``vars.example`` file +and then editing it. +You should change the +country, province, city, org and email +to the correct values for you. +(Note: The country, province, city, org and email are part of +the `Distinguished Name `_ (DN).) +The comments in the file explain what the variables mean. + +.. code:: bash + + cd easy-rsa-3.0.1/easyrsa3 + + cp vars.example vars + + echo 'set_var EASYRSA_DN "org"' >> vars + echo 'set_var EASYRSA_REQ_OU "IT"' >> vars + echo 'set_var EASYRSA_KEY_SIZE 4096' >> vars + + echo 'set_var EASYRSA_REQ_COUNTRY "DE"' >> vars + echo 'set_var EASYRSA_REQ_PROVINCE "Berlin"' >> vars + echo 'set_var EASYRSA_REQ_CITY "Berlin"' >> vars + echo 'set_var EASYRSA_REQ_ORG "BigchainDB GmbH"' >> vars + echo 'set_var EASYRSA_REQ_EMAIL "dev@bigchaindb.com"' >> vars + + +Step 4: Maybe Edit x509-types/server +------------------------------------ + +.. warning:: + + Only do this step if you are setting up a self-signed CA + or creating a server/member certificate. + +Edit the file ``x509-types/server`` and change +``extendedKeyUsage = serverAuth`` to +``extendedKeyUsage = serverAuth,clientAuth``. +See `the MongoDB documentation about x.509 authentication `_ to understand why. diff --git a/docs/server/source/cloud-deployment-templates/first-node.rst b/docs/server/source/cloud-deployment-templates/first-node.rst index 0ba0d31b..298d7ceb 100644 --- a/docs/server/source/cloud-deployment-templates/first-node.rst +++ b/docs/server/source/cloud-deployment-templates/first-node.rst @@ -457,7 +457,7 @@ Step 17. Verify that the Cluster is Correctly Set Up curl -X GET https://ngx-instance-0 - * Check the MongoDB monitoring and backup agent on the MOngoDB Coud Manager portal to verify they are working fine. + * Check the MongoDB monitoring and backup agent on the MongoDB Coud Manager portal to verify they are working fine. * Send some transactions to BigchainDB and verify it's up and running! diff --git a/docs/server/source/cloud-deployment-templates/index.rst b/docs/server/source/cloud-deployment-templates/index.rst index 41eec0ed..af2dc743 100644 --- a/docs/server/source/cloud-deployment-templates/index.rst +++ b/docs/server/source/cloud-deployment-templates/index.rst @@ -10,6 +10,11 @@ If you find the cloud deployment templates for nodes helpful, then you may also .. toctree:: :maxdepth: 1 + workflow + ca-installation + server-tls-certificate + client-tls-certificate + revoke-tls-certificate template-terraform-aws template-ansible azure-quickstart-template @@ -19,3 +24,4 @@ If you find the cloud deployment templates for nodes helpful, then you may also upgrade-on-kubernetes first-node log-analytics + easy-rsa diff --git a/docs/server/source/cloud-deployment-templates/revoke-tls-certificate.rst b/docs/server/source/cloud-deployment-templates/revoke-tls-certificate.rst new file mode 100644 index 00000000..5c566c97 --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/revoke-tls-certificate.rst @@ -0,0 +1,42 @@ +How to Revoke an SSL/TLS Certificate +==================================== + +This page enumerates the steps *we* take to revoke a self-signed SSL/TLS certificate +in a cluster. +It can only be done by someone with access to the self-signed CA +associated with the cluster's managing organization. + +Step 1: Revoke a Certificate +---------------------------- + +Since we used Easy-RSA version 3 to +:ref:`set up the CA `, +we use it to revoke certificates too. + +Go to the following directory (associated with the self-signed CA): +``.../bdb-cluster-ca/easy-rsa-3.0.1/easyrsa3``. +You need to be aware of the file name used to import the certificate using the +``./easyrsa import-req`` before. Run the following command to revoke a +certificate: + +.. code:: bash + + ./easyrsa revoke + +This will update the CA database with the revocation details. +The next step is to use the updated database to issue an up-to-date +certificate revocation list (CRL). + + +Step 2: Generate a New CRL +-------------------------- + +Generate a new CRL for your infrastructure using: + +.. code:: bash + + ./easyrsa gen-crl + +The generated ``crl.pem`` file needs to be uploaded to your infrastructure to +prevent the revoked certificate from being used again. + diff --git a/docs/server/source/cloud-deployment-templates/server-tls-certificate.rst b/docs/server/source/cloud-deployment-templates/server-tls-certificate.rst new file mode 100644 index 00000000..b9cb1a14 --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/server-tls-certificate.rst @@ -0,0 +1,92 @@ +How to Generate a Server Certificate for MongoDB +================================================ + +This page enumerates the steps *we* use to generate a +server certificate for a MongoDB instance. +A server certificate is also referred to as a "member certificate" +in the MongoDB documentation. +We use Easy-RSA. + + +Step 1: Install & Configure Easy–RSA +------------------------------------ + +First create a directory for the server certificate (member cert) and cd into it: + +.. code:: bash + + mkdir member-cert + + cd member-cert + +Then :ref:`install and configure Easy-RSA in that directory `. + + +Step 2: Create the Server Private Key and CSR +--------------------------------------------- + +You can create the server private key and certificate signing request (CSR) +by going into the directory ``member-cert/easy-rsa-3.0.1/easyrsa`` +and using something like: + +.. code:: bash + + ./easyrsa init-pki + + ./easyrsa --req-cn=mdb-instance-0 --subject-alt-name=DNS:localhost,DNS:mdb-instance-0 gen-req mdb-instance-0 nopass + +You must replace the common name (``mdb-instance-0`` above) +with the common name of *your* MongoDB instance +(which should be the same as the hostname of your MongoDB instance). + +You need to provide the ``DNS:localhost`` SAN during certificate generation for +using the ``localhost exception`` in the MongoDB instance. + +All certificates can have this attribute without compromising security as the +``localhost exception`` works only the first time. + +Tip: You can get help with the ``easyrsa`` command (and its subcommands) +by using the subcommand ``./easyrsa help`` + + +Step 3: Get the Server Certificate Signed +----------------------------------------- + +The CSR file (created in the last step) +should be located in ``pki/reqs/mdb-instance-0.req``. +You need to send it to the organization managing the cluster +so that they can use their CA +to sign the request. +(The managing organization should already have a self-signed CA.) + +If you are the admin of the managing organization's self-signed CA, +then you can import the CSR and use Easy-RSA to sign it. For example: + +.. code:: bash + + ./easyrsa import-req mdb-instance-0.req mdb-instance-0 + + ./easyrsa --subject-alt-name=DNS:localhost,DNS:mdb-instance-0 sign-req server mdb-instance-0 + +Once you have signed it, you can send the signed certificate +and the CA certificate back to the requestor. +The files are ``pki/issued/mdb-instance-0.crt`` and ``pki/ca.crt``. + + +Step 4: Generate the Consolidated Server PEM File +------------------------------------------------- + +MongoDB requires a single, consolidated file containing both the public and +private keys. + +.. code:: bash + + cat mdb-instance-0.crt mdb-instance-0.key > mdb-instance-0.pem + + +Step 5: Update the MongoDB Config File +-------------------------------------- + +In the MongoDB configuration file, +set the ``net.ssl.PEMKeyFile`` parameter to the path of the ``mdb-instance-0.pem`` file, +and the ``net.ssl.CAFile`` parameter to the ``ca.crt`` file. diff --git a/docs/server/source/cloud-deployment-templates/template-kubernetes-azure.rst b/docs/server/source/cloud-deployment-templates/template-kubernetes-azure.rst index b967e764..a9e6792c 100644 --- a/docs/server/source/cloud-deployment-templates/template-kubernetes-azure.rst +++ b/docs/server/source/cloud-deployment-templates/template-kubernetes-azure.rst @@ -86,6 +86,7 @@ confuse some software. $ az group create --name --location + Example location names are ``koreacentral`` and ``westeurope``. Finally, you can deploy an ACS using something like: @@ -95,12 +96,14 @@ Finally, you can deploy an ACS using something like: $ az acs create --name \ --resource-group \ --master-count 3 \ - --agent-count 3 \ + --agent-count 2 \ --admin-username ubuntu \ --agent-vm-size Standard_D2_v2 \ --dns-prefix \ --ssh-key-value ~/.ssh/.pub \ --orchestrator-type kubernetes + --debug --output json + There are more options. For help understanding all the options, use the built-in help: diff --git a/docs/server/source/cloud-deployment-templates/workflow.rst b/docs/server/source/cloud-deployment-templates/workflow.rst new file mode 100644 index 00000000..b8aa919f --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/workflow.rst @@ -0,0 +1,123 @@ +Overview +======== + +This page summarizes the steps *we* go through +to set up a production BigchainDB cluster. +We are constantly improving them. +You can modify them to suit your needs. + + +Things the Managing Organization Must Do First +---------------------------------------------- + + +1. Set Up a Self-Signed Certificate Authority +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We use SSL/TLS and self-signed certificates +for MongoDB authentication (and message encryption). +The certificates are signed by the organization managing the cluster. +If your organization already has a process +for signing certificates +(i.e. an internal self-signed certificate authority [CA]), +then you can skip this step. +Otherwise, your organization must +:ref:`set up its own self-signed certificate authority `. + + +2. Register a Domain and Get an SSL Certificate for It +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The BigchainDB APIs (HTTP API and WebSocket API) should be served using TLS, +so the organization running the cluster +should choose an FQDN for their API (e.g. api.organization-x.com), +register the domain name, +and buy an SSL/TLS certificate for the FQDN. + + +Things Each Node Operator Must Do +--------------------------------- + +☐ Every MongoDB instance in the cluster must have a unique (one-of-a-kind) name. +Ask the organization managing your cluster if they have a standard +way of naming instances in the cluster. +For example, maybe they assign a unique number to each node, +so that if you're operating node 12, your MongoDB instance would be named +``mdb-instance-12``. +Similarly, other instances must also have unique names in the cluster. + +#. Name of the MongoDB instance (``mdb-instance-*``) +#. Name of the BigchainDB instance (``bdb-instance-*``) +#. Name of the NGINX instance (``ngx-instance-*``) +#. Name of the MongoDB monitoring agent instance (``mdb-mon-instance-*``) +#. Name of the MongoDB backup agent instance (``mdb-bak-instance-*``) + + +☐ Every node in a BigchainDB cluster needs its own +BigchainDB keypair (i.e. a public key and corresponding private key). +You can generate a BigchainDB keypair for your node, for example, +using the `BigchainDB Python Driver `_. + +.. code:: python + + from bigchaindb_driver.crypto import generate_keypair + print(generate_keypair()) + + +☐ Share your BigchaindB *public* key with all the other nodes +in the BigchainDB cluster. +Don't share your private key. + + +☐ Get the BigchainDB public keys of all the other nodes in the cluster. +That list of public keys is known as the BigchainDB "keyring." + + +☐ Ask the managing organization +for the FQDN used to serve the BigchainDB APIs +(e.g. ``api.orgname.net`` or ``bdb.clustername.com``). + + +☐ Make up an FQDN for your BigchainDB node (e.g. ``mynode.mycorp.com``). +Make sure you've registered the associated domain name (e.g. ``mycorp.com``), +and have an SSL certificate for the FQDN. +(You can get an SSL certificate from any SSL certificate provider). + + +☐ If the cluster uses 3scale for API authentication, monitoring and billing, +you must ask the managing organization for all relevant 3scale credentials. + + +☐ If the cluster uses MongoDB Cloud Manager for monitoring and backup, +you must ask the managing organization for the ``Agent Api Key``. +(Each Cloud Manager backup will have its own ``Agent Api Key``. +If there's one Cloud Manager backup, +there will be one ``Agent Api Key`` for the whole cluster.) + + +☐ Generate four keys and corresponding certificate signing requests (CSRs): + +#. Server Certificate (a.k.a. Member Certificate) for the MongoDB instance +#. Client Certificate for BigchainDB Server to identify itself to MongoDB +#. Client Certificate for MongoDB Monitoring Agent to identify itself to MongoDB +#. Client Certificate for MongoDB Backup Agent to identify itself to MongoDB + +Ask the managing organization to use its self-signed CA to sign those certificates. + +For help, see the pages: + +* :ref:`How to Generate a Server Certificate for MongoDB` +* :ref:`How to Generate a Client Certificate for MongoDB` + + +☐ :doc:`Deploy a Kubernetes cluster on Azure `. + + +☐ Create the Kubernetes Configuration for this node. +We will use Kubernetes ConfigMaps and Secrets to hold all the information +gathered above. + + +☐ Deploy your BigchainDB node on your Kubernetes cluster. + +TODO: Links to instructions for first-node-in-cluster or second-or-later-node-in-cluster \ No newline at end of file diff --git a/k8s/mongodb-backup-agent/container/docker_build_and_push.bash b/k8s/mongodb-backup-agent/container/docker_build_and_push.bash index e57e58a1..5d1780ea 100755 --- a/k8s/mongodb-backup-agent/container/docker_build_and_push.bash +++ b/k8s/mongodb-backup-agent/container/docker_build_and_push.bash @@ -1,5 +1,5 @@ #!/bin/bash -docker build -t bigchaindb/mongodb-backup-agent:1.0 . +docker build -t bigchaindb/mongodb-backup-agent:2.0 . -docker push bigchaindb/mongodb-backup-agent:1.0 +docker push bigchaindb/mongodb-backup-agent:2.0 diff --git a/k8s/mongodb-backup-agent/container/mongodb_backup_agent_entrypoint.bash b/k8s/mongodb-backup-agent/container/mongodb_backup_agent_entrypoint.bash index ef3895ea..6b663fe9 100755 --- a/k8s/mongodb-backup-agent/container/mongodb_backup_agent_entrypoint.bash +++ b/k8s/mongodb-backup-agent/container/mongodb_backup_agent_entrypoint.bash @@ -5,17 +5,28 @@ set -euo pipefail MONGODB_BACKUP_CONF_FILE=/etc/mongodb-mms/backup-agent.config mms_api_key=`printenv MMS_API_KEY` +ca_crt_path=`printenv CA_CRT_PATH` +backup_crt_path=`printenv BACKUP_PEM_PATH` -if [[ -z "${mms_api_key}" ]]; then +if [[ -z "${mms_api_key}" || \ + -z "${ca_crt_path}" || \ + -z "${backup_crt_path}" ]]; then echo "Invalid environment settings detected. Exiting!" exit 1 fi -sed -i '/mmsApiKey/d' $MONGODB_BACKUP_CONF_FILE -sed -i '/mothership/d' $MONGODB_BACKUP_CONF_FILE +sed -i '/mmsApiKey/d' ${MONGODB_BACKUP_CONF_FILE} +sed -i '/mothership/d' ${MONGODB_BACKUP_CONF_FILE} -echo "mmsApiKey="${mms_api_key} >> $MONGODB_BACKUP_CONF_FILE -echo "mothership=api-backup.eu-west-1.mongodb.com" >> $MONGODB_BACKUP_CONF_FILE +echo "mmsApiKey="${mms_api_key} >> ${MONGODB_BACKUP_CONF_FILE} +echo "mothership=api-backup.eu-west-1.mongodb.com" >> ${MONGODB_BACKUP_CONF_FILE} + +# Append SSL settings to the config file +echo "useSslForAllConnections=true" >> ${MONGODB_BACKUP_CONF_FILE} +echo "sslRequireValidServerCertificates=true" >> ${MONGODB_BACKUP_CONF_FILE} +echo "sslTrustedServerCertificates="${ca_crt_path} >> ${MONGODB_BACKUP_CONF_FILE} +echo "sslClientCertificate="${backup_crt_path} >> ${MONGODB_BACKUP_CONF_FILE} +echo "#sslClientCertificatePassword=" >> ${MONGODB_BACKUP_CONF_FILE} echo "INFO: starting mdb backup..." exec mongodb-mms-backup-agent -c $MONGODB_BACKUP_CONF_FILE diff --git a/k8s/mongodb-monitoring-agent/container/docker_build_and_push.bash b/k8s/mongodb-monitoring-agent/container/docker_build_and_push.bash index d2219b08..caefb6d7 100755 --- a/k8s/mongodb-monitoring-agent/container/docker_build_and_push.bash +++ b/k8s/mongodb-monitoring-agent/container/docker_build_and_push.bash @@ -1,5 +1,5 @@ #!/bin/bash -docker build -t bigchaindb/mongodb-monitoring-agent:1.0 . +docker build -t bigchaindb/mongodb-monitoring-agent:2.0 . -docker push bigchaindb/mongodb-monitoring-agent:1.0 +docker push bigchaindb/mongodb-monitoring-agent:2.0 diff --git a/k8s/mongodb-monitoring-agent/container/mongodb_mon_agent_entrypoint.bash b/k8s/mongodb-monitoring-agent/container/mongodb_mon_agent_entrypoint.bash index 6454c729..9ef96303 100755 --- a/k8s/mongodb-monitoring-agent/container/mongodb_mon_agent_entrypoint.bash +++ b/k8s/mongodb-monitoring-agent/container/mongodb_mon_agent_entrypoint.bash @@ -9,8 +9,12 @@ set -euo pipefail MONGODB_MON_CONF_FILE=/etc/mongodb-mms/monitoring-agent.config mms_api_key=`printenv MMS_API_KEY` +ca_crt_path=`printenv CA_CRT_PATH` +monitoring_crt_path=`printenv MONITORING_PEM_PATH` -if [[ -z "${mms_api_key}" ]]; then +if [[ -z "${mms_api_key}" || \ + -z "${ca_crt_path}" || \ + -z "${monitoring_crt_path}" ]]; then echo "Invalid environment settings detected. Exiting!" exit 1 fi @@ -21,7 +25,14 @@ sed -i '/mmsApiKey/d' $MONGODB_MON_CONF_FILE # Append a new line of the form # mmsApiKey=value_of_MMS_API_KEY -echo "mmsApiKey="${mms_api_key} >> $MONGODB_MON_CONF_FILE +echo "mmsApiKey="${mms_api_key} >> ${MONGODB_MON_CONF_FILE} + +# Append SSL settings to the config file +echo "useSslForAllConnections=true" >> ${MONGODB_MON_CONF_FILE} +echo "sslRequireValidServerCertificates=true" >> ${MONGODB_MON_CONF_FILE} +echo "sslTrustedServerCertificates="${ca_crt_path} >> ${MONGODB_MON_CONF_FILE} +echo "sslClientCertificate="${monitoring_crt_path} >> ${MONGODB_MON_CONF_FILE} +echo "#sslClientCertificatePassword=" >> ${MONGODB_MON_CONF_FILE} # start mdb monitoring agent echo "INFO: starting mdb monitor..." diff --git a/k8s/mongodb/container/Dockerfile b/k8s/mongodb/container/Dockerfile index e9667f95..66b076c7 100644 --- a/k8s/mongodb/container/Dockerfile +++ b/k8s/mongodb/container/Dockerfile @@ -1,12 +1,13 @@ -FROM mongo:3.4.3 +FROM mongo:3.4.4 LABEL maintainer "dev@bigchaindb.com" WORKDIR / RUN apt-get update \ && apt-get -y upgrade \ && apt-get autoremove \ - && apt-get clean -COPY mongod.conf.template /etc/mongod.conf.template -COPY mongod_entrypoint/mongod_entrypoint / -VOLUME /data/db /data/configdb + && apt-get clean \ + && mkdir /mongo-ssl +COPY mongod.conf.template /etc/mongod.conf +COPY mongod_entrypoint.bash / +VOLUME /data/db /data/configdb /mongo-ssl EXPOSE 27017 -ENTRYPOINT ["/mongod_entrypoint"] +ENTRYPOINT ["/mongod_entrypoint.bash"] diff --git a/k8s/mongodb/container/Makefile b/k8s/mongodb/container/Makefile deleted file mode 100644 index 0a3779af..00000000 --- a/k8s/mongodb/container/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# Targets: -# all: Cleans, formats src files, builds the code, builds the docker image -# clean: Removes the binary and docker image -# format: Formats the src files -# build: Builds the code -# docker: Builds the code and docker image -# push: Push the docker image to Docker hub - -GOCMD=go -GOVET=$(GOCMD) tool vet -GOINSTALL=$(GOCMD) install -GOFMT=gofmt -s -w - -DOCKER_IMAGE_NAME?=bigchaindb/mongodb -DOCKER_IMAGE_TAG?=3.4.3 - -PWD=$(shell pwd) -BINARY_PATH=$(PWD)/mongod_entrypoint/ -BINARY_NAME=mongod_entrypoint -MAIN_FILE = $(BINARY_PATH)/mongod_entrypoint.go -SRC_FILES = $(BINARY_PATH)/mongod_entrypoint.go - -.PHONY: all - -all: clean build docker - -clean: - @echo "removing any pre-built binary"; - -@rm $(BINARY_PATH)/$(BINARY_NAME); - @echo "remove any pre-built docker image"; - -@docker rmi $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG); - -format: - $(GOFMT) $(SRC_FILES) - -build: format - $(shell cd $(BINARY_PATH) && \ - export GOPATH="$(BINARY_PATH)" && \ - export GOBIN="$(BINARY_PATH)" && \ - CGO_ENABLED=0 GOOS=linux $(GOINSTALL) -ldflags "-s" -a -installsuffix cgo $(MAIN_FILE)) - -docker: build - docker build \ - -t $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) .; - -vet: - $(GOVET) . - -push: - docker push \ - $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG); diff --git a/k8s/mongodb/container/README.md b/k8s/mongodb/container/README.md index baad9f13..9f9c46d1 100644 --- a/k8s/mongodb/container/README.md +++ b/k8s/mongodb/container/README.md @@ -2,7 +2,7 @@ ### Need -* MongoDB needs the hostname provided in the rs.initiate() command to be +* MongoDB needs the hostname provided in the `rs.initiate()` command to be resolvable through the hosts file locally. * In the future, with the introduction of TLS for inter-cluster MongoDB communications, we will need a way to specify detailed configuration. @@ -11,32 +11,52 @@ ### Step 1: Build the Latest Container -`make` from the root of this project. +`docker build -t bigchaindb/mongodb:3.4.4 .` from the root of this project. ### Step 2: Run the Container ``` docker run \ ---name=mdb1 \ ---publish=: \ ---rm=true \ -bigchaindb/mongodb \ ---replica-set-name \ ---fqdn \ ---port + --cap-add=FOWNER \ + --name=mdb1 \ + --publish=: \ + --rm=true \ + --volume=:/data/db \ + --volume=:/data/configdb \ + --volume=:/mongo-ssl:ro \ + bigchaindb/mongodb:3.4.4 \ + --mongodb-port \ + --mongodb-key-file-path /mongo-ssl/.pem \ + --mongodb-key-file-password \ + --mongodb-ca-file-path /mongo-ssl/.crt \ + --mongodb-crl-file-path /mongo-ssl/.pem \ + --replica-set-name \ + --mongodb-fqdn \ + --mongodb-ip ``` #### Step 3: Initialize the Replica Set Login to one of the MongoDB containers, say mdb1: -`docker exec -it mdb1 bash` +`docker exec -it mongodb bash` + +Since we need TLS certificates to use the mongo shell now, copy them using: + +``` +docker cp bdb-instance-0.pem mongodb:/ +docker cp ca.crt mongodb:/ +``` Start the `mongo` shell: -`mongo --port 27017` - +``` +mongo --host mdb1-fqdn --port mdb1-port --verbose --ssl \ + --sslCAFile /ca.crt \ + --sslPEMKeyFile /bdb-instance-0.pem \ + --sslPEMKeyPassword password +``` Run the rs.initiate() command: ``` diff --git a/k8s/mongodb/container/mongod.conf.template b/k8s/mongodb/container/mongod.conf.template index 28e74acf..5b5f5c1f 100644 --- a/k8s/mongodb/container/mongod.conf.template +++ b/k8s/mongodb/container/mongod.conf.template @@ -6,7 +6,7 @@ # where to write logging data. systemLog: verbosity: 0 - #TODO traceAllExceptions: true + # traceAllExceptions: true timeStampFormat: iso8601-utc component: accessControl: @@ -41,7 +41,7 @@ processManagement: pidFilePath: /tmp/mongod.pid net: - port: PORT + port: MONGODB_PORT bindIp: 0.0.0.0 maxIncomingConnections: 8192 wireObjectCheck: false @@ -53,11 +53,24 @@ net: enabled: false compression: compressors: snappy - #ssl: TODO + ssl: + mode: requireSSL + PEMKeyFile: MONGODB_KEY_FILE_PATH + #PEMKeyPassword: MONGODB_KEY_FILE_PASSWORD + CAFile: MONGODB_CA_FILE_PATH + CRLFile: MONGODB_CRL_FILE_PATH + + #allowConnectionsWithoutCertificates: false + #allowInvalidHostnames: false + #weakCertificateValidation: false + #allowInvalidCertificates: false #security: TODO +# authorization: enabled +# clusterAuthMode: x509 -#setParameter: +setParameter: + enableLocalhostAuthBypass: true #notablescan: 1 TODO #logUserIds: 1 TODO @@ -85,5 +98,3 @@ replication: replSetName: REPLICA_SET_NAME enableMajorityReadConcern: true -#sharding: - diff --git a/k8s/mongodb/container/mongod_entrypoint.bash b/k8s/mongodb/container/mongod_entrypoint.bash new file mode 100755 index 00000000..157c92c9 --- /dev/null +++ b/k8s/mongodb/container/mongod_entrypoint.bash @@ -0,0 +1,91 @@ +#!/bin/bash +set -euo pipefail + +MONGODB_PORT="" +MONGODB_KEY_FILE_PATH="" +#MONGODB_KEY_FILE_PASSWORD="" +MONGODB_CA_FILE_PATH="" +MONGODB_CRL_FILE_PATH="" +REPLICA_SET_NAME="" +MONGODB_FQDN="" +MONGODB_IP="" + +while [[ $# -gt 1 ]]; do + arg="$1" + case $arg in + --mongodb-port) + MONGODB_PORT="$2" + shift + ;; + --mongodb-key-file-path) + MONGODB_KEY_FILE_PATH="$2" + shift + ;; + --mongodb-key-file-password) + # TODO(Krish) move this to a mapped file later + MONGODB_KEY_FILE_PASSWORD="$2" + shift + ;; + --mongodb-ca-file-path) + MONGODB_CA_FILE_PATH="$2" + shift + ;; + --mongodb-crl-file-path) + MONGODB_CRL_FILE_PATH="$2" + shift + ;; + --replica-set-name) + REPLICA_SET_NAME="$2" + shift + ;; + --mongodb-fqdn) + MONGODB_FQDN="$2" + shift + ;; + --mongodb-ip) + MONGODB_IP="$2" + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac + shift +done + +# sanity checks +if [[ -z "${REPLICA_SET_NAME}" || \ + -z "${MONGODB_PORT}" || \ + -z "${MONGODB_FQDN}" || \ + -z "${MONGODB_IP}" || \ + -z "${MONGODB_KEY_FILE_PATH}" || \ + -z "${MONGODB_CA_FILE_PATH}" || \ + -z "${MONGODB_CRL_FILE_PATH}" ]] ; then + #-z "${MONGODB_KEY_FILE_PASSWORD}" || \ + echo "Empty parameters detected. Exiting!" + exit 2 +fi + +MONGODB_CONF_FILE_PATH=/etc/mongod.conf +HOSTS_FILE_PATH=/etc/hosts + +# configure the mongod.conf file +sed -i "s|MONGODB_PORT|${MONGODB_PORT}|g" ${MONGODB_CONF_FILE_PATH} +sed -i "s|MONGODB_KEY_FILE_PATH|${MONGODB_KEY_FILE_PATH}|g" ${MONGODB_CONF_FILE_PATH} +#sed -i "s|MONGODB_KEY_FILE_PASSWORD|${MONGODB_KEY_FILE_PASSWORD}|g" ${MONGODB_CONF_FILE_PATH} +sed -i "s|MONGODB_CA_FILE_PATH|${MONGODB_CA_FILE_PATH}|g" ${MONGODB_CONF_FILE_PATH} +sed -i "s|MONGODB_CRL_FILE_PATH|${MONGODB_CRL_FILE_PATH}|g" ${MONGODB_CONF_FILE_PATH} +sed -i "s|REPLICA_SET_NAME|${REPLICA_SET_NAME}|g" ${MONGODB_CONF_FILE_PATH} + +# add the hostname and ip to hosts file +echo "${MONGODB_IP} ${MONGODB_FQDN}" >> $HOSTS_FILE_PATH + +# start mongod +echo "INFO: starting mongod..." + +# TODO Uncomment the first exec command and use it instead of the second one +# after https://github.com/docker-library/mongo/issues/172 is resolved. Check +# for other bugs too. +#exec /entrypoint.sh mongod --config ${MONGODB_CONF_FILE_PATH} +exec /usr/bin/mongod --config ${MONGODB_CONF_FILE_PATH} diff --git a/k8s/mongodb/container/mongod_entrypoint/mongod_entrypoint.go b/k8s/mongodb/container/mongod_entrypoint/mongod_entrypoint.go deleted file mode 100644 index 57b48974..00000000 --- a/k8s/mongodb/container/mongod_entrypoint/mongod_entrypoint.go +++ /dev/null @@ -1,154 +0,0 @@ -package main - -import ( - "bytes" - "errors" - "flag" - "fmt" - "io/ioutil" - "log" - "net" - "os" - "regexp" - "syscall" -) - -const ( - mongoConfFilePath string = "/etc/mongod.conf" - mongoConfTemplateFilePath string = "/etc/mongod.conf.template" - hostsFilePath string = "/etc/hosts" -) - -var ( - // Use the same entrypoint as the mongo:3.4.2 image; just supply it with - // the mongod conf file with custom params - mongoStartCmd []string = []string{"/entrypoint.sh", "mongod", "--config", - mongoConfFilePath} -) - -// context struct stores the user input and the constraints for the specified -// input. It also stores the keyword that needs to be replaced in the template -// files. -type context struct { - cliInput string - templateKeyword string - regex string -} - -// sanity function takes the pre-defined constraints and the user inputs as -// arguments and validates user input based on regex matching -func sanity(input map[string]*context, fqdn, ip string) error { - var format *regexp.Regexp - for _, ctx := range input { - format = regexp.MustCompile(ctx.regex) - if format.MatchString(ctx.cliInput) == false { - return errors.New(fmt.Sprintf( - "Invalid value: '%s' for '%s'. Can be '%s'", - ctx.cliInput, - ctx.templateKeyword, - ctx.regex)) - } - } - - format = regexp.MustCompile(`[a-z0-9-.]+`) - if format.MatchString(fqdn) == false { - return errors.New(fmt.Sprintf( - "Invalid value: '%s' for FQDN. Can be '%s'", - fqdn, - format)) - } - - if net.ParseIP(ip) == nil { - return errors.New(fmt.Sprintf( - "Invalid value: '%s' for IPv4. Can be a.b.c.d", - ip)) - } - - return nil -} - -// createFile function takes the pre-defined keywords, user inputs, the -// template file path and the new file path location as parameters, and -// creates a new file at file path with all the keywords replaced by inputs. -func createFile(input map[string]*context, - template string, conf string) error { - // read the template - contents, err := ioutil.ReadFile(template) - if err != nil { - return err - } - // replace - for _, ctx := range input { - contents = bytes.Replace(contents, []byte(ctx.templateKeyword), - []byte(ctx.cliInput), -1) - } - // write - err = ioutil.WriteFile(conf, contents, 0644) - if err != nil { - return err - } - return nil -} - -// updateHostsFile takes the FQDN supplied as input to the container and adds -// an entry to /etc/hosts -func updateHostsFile(ip, fqdn string) error { - fileHandle, err := os.OpenFile(hostsFilePath, os.O_APPEND|os.O_WRONLY, - os.ModeAppend) - if err != nil { - return err - } - defer fileHandle.Close() - // append - _, err = fileHandle.WriteString(fmt.Sprintf("\n%s %s\n", ip, fqdn)) - if err != nil { - return err - } - return nil -} - -func main() { - var fqdn, ip string - input := make(map[string]*context) - - input["replica-set-name"] = &context{} - input["replica-set-name"].regex = `[a-z]+` - input["replica-set-name"].templateKeyword = "REPLICA_SET_NAME" - flag.StringVar(&input["replica-set-name"].cliInput, - "replica-set-name", - "", - "replica set name") - - input["port"] = &context{} - input["port"].regex = `[0-9]{4,5}` - input["port"].templateKeyword = "PORT" - flag.StringVar(&input["port"].cliInput, - "port", - "", - "mongodb port number") - - flag.StringVar(&fqdn, "fqdn", "", "FQDN of the MongoDB instance") - flag.StringVar(&ip, "ip", "", "IPv4 address of the container") - - flag.Parse() - err := sanity(input, fqdn, ip) - if err != nil { - log.Fatal(err) - } - - err = createFile(input, mongoConfTemplateFilePath, mongoConfFilePath) - if err != nil { - log.Fatal(err) - } - - err = updateHostsFile(ip, fqdn) - if err != nil { - log.Fatal(err) - } - - fmt.Printf("Starting Mongod....") - err = syscall.Exec(mongoStartCmd[0], mongoStartCmd[0:], os.Environ()) - if err != nil { - panic(err) - } -} From 5de15c33ec5ca8d1de08aefb01e745c8d47b266f Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Fri, 19 May 2017 14:05:28 +0200 Subject: [PATCH 38/60] disable threads in web workers --- bigchaindb/__init__.py | 1 - bigchaindb/web/server.py | 7 +++++-- docs/server/source/server-reference/configuration.md | 6 ++---- tests/test_config_utils.py | 1 - 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/bigchaindb/__init__.py b/bigchaindb/__init__.py index 9c981385..07f35320 100644 --- a/bigchaindb/__init__.py +++ b/bigchaindb/__init__.py @@ -62,7 +62,6 @@ config = { 'loglevel': logging.getLevelName( log_config['handlers']['console']['level']).lower(), 'workers': None, # if none, the value will be cpu_count * 2 + 1 - 'threads': None, # if none, the value will be cpu_count * 2 + 1 }, 'wsserver': { 'host': os.environ.get('BIGCHAINDB_WSSERVER_HOST') or 'localhost', diff --git a/bigchaindb/web/server.py b/bigchaindb/web/server.py index 3c33a33a..0b24bd64 100644 --- a/bigchaindb/web/server.py +++ b/bigchaindb/web/server.py @@ -48,7 +48,7 @@ class StandaloneApplication(gunicorn.app.base.BaseApplication): return self.application -def create_app(*, debug=False, threads=4): +def create_app(*, debug=False, threads=1): """Return an instance of the Flask application. Args: @@ -102,7 +102,10 @@ def create_server(settings): settings['workers'] = (multiprocessing.cpu_count() * 2) + 1 if not settings.get('threads'): - settings['threads'] = (multiprocessing.cpu_count() * 2) + 1 + # Note: Threading is not recommended currently, as the frontend workload + # is largely CPU bound and parallisation across Python threads makes it + # slower. + settings['threads'] = 1 settings['logger_class'] = 'bigchaindb.log.loggers.HttpServerLogger' app = create_app(debug=settings.get('debug', False), diff --git a/docs/server/source/server-reference/configuration.md b/docs/server/source/server-reference/configuration.md index 053ed68b..04739db3 100644 --- a/docs/server/source/server-reference/configuration.md +++ b/docs/server/source/server-reference/configuration.md @@ -142,7 +142,7 @@ If you used `bigchaindb -y configure mongodb` to create a default local config f ``` -## server.bind, server.loglevel, server.workers & server.threads +## server.bind, server.loglevel & server.workers These settings are for the [Gunicorn HTTP server](http://gunicorn.org/), which is used to serve the [HTTP client-server API](../http-client-server-api.html). @@ -152,7 +152,7 @@ These settings are for the [Gunicorn HTTP server](http://gunicorn.org/), which i [Gunicorn's documentation](http://docs.gunicorn.org/en/latest/settings.html#loglevel) for more information. -`server.workers` is [the number of worker processes](http://docs.gunicorn.org/en/stable/settings.html#workers) for handling requests. If `None` (the default), the value will be (cpu_count * 2 + 1). `server.threads` is [the number of threads-per-worker](http://docs.gunicorn.org/en/stable/settings.html#threads) for handling requests. If `None` (the default), the value will be (cpu_count * 2 + 1). The HTTP server will be able to handle `server.workers` * `server.threads` requests simultaneously. +`server.workers` is [the number of worker processes](http://docs.gunicorn.org/en/stable/settings.html#workers) for handling requests. If `None` (the default), the value will be (cpu_count * 2 + 1). If `None` (the default), the value will be 1. The HTTP server will be able to handle `server.workers` requests simultaneously. **Example using environment variables** ```text @@ -168,7 +168,6 @@ export BIGCHAINDB_SERVER_THREADS=5 "bind": "0.0.0.0:9984", "loglevel": "debug", "workers": 5, - "threads": 5 } ``` @@ -178,7 +177,6 @@ export BIGCHAINDB_SERVER_THREADS=5 "bind": "localhost:9984", "loglevel": "info", "workers": null, - "threads": null } ``` diff --git a/tests/test_config_utils.py b/tests/test_config_utils.py index 88c3431e..a2d1f13e 100644 --- a/tests/test_config_utils.py +++ b/tests/test_config_utils.py @@ -209,7 +209,6 @@ def test_autoconfigure_read_both_from_file_and_env(monkeypatch, request): 'loglevel': logging.getLevelName( log_config['handlers']['console']['level']).lower(), 'workers': None, - 'threads': None, }, 'wsserver': { 'host': WSSERVER_HOST, From ba40034362df9f2910a8c3755e05334ef3339dc8 Mon Sep 17 00:00:00 2001 From: Sylvain Bellemare Date: Tue, 16 May 2017 12:07:04 +0200 Subject: [PATCH 39/60] Update changelog for v0.10.2 --- CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3db903e3..4bfbd8dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,20 @@ For reference, the possible headings are: * **External Contributors** to list contributors outside of BigchainDB GmbH. * **Notes** +## [0.10.2] - 2017-05-16 +Tag name: v0.10.2 + +## Added +* Add Cross Origin Resource Sharing (CORS) support for the HTTP API. + [Commit 6cb7596](https://github.com/bigchaindb/bigchaindb/commit/6cb75960b05403c77bdae0fd327612482589efcb) + +## Fixed +* Fixed `streams_v1` API link in response to `GET /api/v1`. + [Pull Request #1466](https://github.com/bigchaindb/bigchaindb/pull/1466) +* Fixed mismatch between docs and implementation for `GET /blocks?status=` + endpoint. The `status` query parameter is now case insensitive. + [Pull Request #1464](https://github.com/bigchaindb/bigchaindb/pull/1464) + ## [0.10.1] - 2017-04-19 Tag name: v0.10.1 From 2589e16fd648734dd275dd659418b4883fb114d0 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 22 May 2017 12:41:15 +0200 Subject: [PATCH 40/60] pipeline fixes; dont validate tx schema during block creation, parallelise tx schema validation in vote pipeline --- bigchaindb/models.py | 25 +++++++++++++++++--- bigchaindb/pipelines/block.py | 15 ++++-------- bigchaindb/pipelines/vote.py | 32 ++++++++++++++------------ tests/pipelines/test_block_creation.py | 13 +++++------ tests/pipelines/test_vote.py | 21 ++++++++--------- 5 files changed, 59 insertions(+), 47 deletions(-) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 2f46ba20..9f13a131 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -240,11 +240,12 @@ class Block(object): return False @classmethod - def from_dict(cls, block_body): + def from_dict(cls, block_body, tx_construct=Transaction.from_dict): """Transform a Python dictionary to a Block object. Args: block_body (dict): A block dictionary to be transformed. + tx_class (class): Transaction class to use Returns: :class:`~Block` @@ -261,8 +262,7 @@ class Block(object): if block_id != block_body['id']: raise InvalidHash() - transactions = [Transaction.from_dict(tx) for tx - in block['transactions']] + transactions = [tx_construct(tx) for tx in block['transactions']] signature = block_body.get('signature') @@ -302,3 +302,22 @@ class Block(object): def to_str(self): return serialize(self.to_dict()) + + +class FastTransaction: + """ + A minimal wrapper around a transaction dictionary. This is useful for + when validation is not required but a routine expects something that looks + like a transaction, for example during block creation. + + Note: immutability could also be provided + """ + def __init__(self, tx_dict): + self.data = tx_dict + + @property + def id(self): + return self.data['id'] + + def to_dict(self): + return self.data diff --git a/bigchaindb/pipelines/block.py b/bigchaindb/pipelines/block.py index 0fe327bb..40e5afe8 100644 --- a/bigchaindb/pipelines/block.py +++ b/bigchaindb/pipelines/block.py @@ -12,7 +12,7 @@ from multipipes import Pipeline, Node, Pipe import bigchaindb from bigchaindb import backend from bigchaindb.backend.changefeed import ChangeFeed -from bigchaindb.models import Transaction +from bigchaindb.models import FastTransaction from bigchaindb.common.exceptions import ValidationError from bigchaindb import Bigchain @@ -57,11 +57,11 @@ class BlockPipeline: tx (dict): the transaction to validate. Returns: - :class:`~bigchaindb.models.Transaction`: The transaction if valid, + :class:`~bigchaindb.models.FastTransaction`: The transaction if valid, ``None`` otherwise. """ try: - tx = Transaction.from_dict(tx) + tx = FastTransaction(tx) except ValidationError: return None @@ -71,14 +71,7 @@ class BlockPipeline: self.bigchain.delete_transaction(tx.id) return None - # If transaction is not valid it should not be included - try: - tx.validate(self.bigchain) - return tx - except ValidationError as e: - logger.warning('Invalid tx: %s', e) - self.bigchain.delete_transaction(tx.id) - return None + return tx def create(self, tx, timeout=False): """Create a block. diff --git a/bigchaindb/pipelines/vote.py b/bigchaindb/pipelines/vote.py index 9664c520..dc516845 100644 --- a/bigchaindb/pipelines/vote.py +++ b/bigchaindb/pipelines/vote.py @@ -14,7 +14,7 @@ import bigchaindb from bigchaindb import Bigchain from bigchaindb import backend from bigchaindb.backend.changefeed import ChangeFeed -from bigchaindb.models import Transaction, Block +from bigchaindb.models import Transaction, Block, FastTransaction from bigchaindb.common import exceptions @@ -44,20 +44,21 @@ class Vote: self.counters = Counter() self.validity = {} - self.invalid_dummy_tx = Transaction.create([self.bigchain.me], - [([self.bigchain.me], 1)]) + dummy_tx = Transaction.create([self.bigchain.me], + [([self.bigchain.me], 1)]).to_dict() + self.invalid_dummy_tx = dummy_tx - def validate_block(self, block): - if not self.bigchain.has_previous_vote(block['id']): + def validate_block(self, block_dict): + if not self.bigchain.has_previous_vote(block_dict['id']): try: - block = Block.from_dict(block) + block = Block.from_dict(block_dict, tx_construct=FastTransaction) except (exceptions.InvalidHash): # XXX: if a block is invalid we should skip the `validate_tx` # step, but since we are in a pipeline we cannot just jump to # another function. Hackish solution: generate an invalid # transaction and propagate it to the next steps of the # pipeline. - return block['id'], [self.invalid_dummy_tx] + return block_dict['id'], [self.invalid_dummy_tx] try: block._validate_block(self.bigchain) except exceptions.ValidationError: @@ -67,14 +68,14 @@ class Vote: # transaction and propagate it to the next steps of the # pipeline. return block.id, [self.invalid_dummy_tx] - return block.id, block.transactions + return block.id, block_dict['block']['transactions'] def ungroup(self, block_id, transactions): """Given a block, ungroup the transactions in it. Args: block_id (str): the id of the block in progress. - transactions (list(Transaction)): transactions of the block in + transactions (list(dict)): transactions of the block in progress. Returns: @@ -87,12 +88,12 @@ class Vote: for tx in transactions: yield tx, block_id, num_tx - def validate_tx(self, tx, block_id, num_tx): + def validate_tx(self, tx_dict, block_id, num_tx): """Validate a transaction. Transaction must also not be in any VALID block. Args: - tx (dict): the transaction to validate + tx_dict (dict): the transaction to validate block_id (str): the id of block containing the transaction num_tx (int): the total number of transactions to process @@ -100,16 +101,17 @@ class Vote: Three values are returned, the validity of the transaction, ``block_id``, ``num_tx``. """ - new = self.bigchain.is_new_transaction(tx.id, exclude_block_id=block_id) - if not new: - return False, block_id, num_tx try: + tx = Transaction.from_dict(tx_dict) + new = self.bigchain.is_new_transaction(tx.id, exclude_block_id=block_id) + if not new: + raise exceptions.ValidationError('Tx already exists, %s', tx.id) tx.validate(self.bigchain) valid = True except exceptions.ValidationError as e: - logger.warning('Invalid tx: %s', e) valid = False + logger.warning('Invalid tx: %s', e) return valid, block_id, num_tx diff --git a/tests/pipelines/test_block_creation.py b/tests/pipelines/test_block_creation.py index 27efc65d..ffd423e3 100644 --- a/tests/pipelines/test_block_creation.py +++ b/tests/pipelines/test_block_creation.py @@ -28,15 +28,14 @@ def test_filter_by_assignee(b, signed_create_tx): @pytest.mark.bdb -def test_validate_transaction(b, create_tx): +def test_validate_transaction(b): from bigchaindb.pipelines.block import BlockPipeline + # validate_tx doesn't actually validate schema anymore. + tx = {'id': 'a'} + block_maker = BlockPipeline() - - assert block_maker.validate_tx(create_tx.to_dict()) is None - - valid_tx = create_tx.sign([b.me_private]) - assert block_maker.validate_tx(valid_tx.to_dict()) == valid_tx + assert block_maker.validate_tx(tx).data == tx def test_validate_transaction_handles_exceptions(b, signed_create_tx): @@ -50,7 +49,7 @@ def test_validate_transaction_handles_exceptions(b, signed_create_tx): tx_dict = signed_create_tx.to_dict() - with patch('bigchaindb.models.Transaction.validate') as validate: + with patch('bigchaindb.models.FastTransaction.__init__') as validate: # Assert that validationerror gets caught validate.side_effect = ValidationError() assert block_maker.validate_tx(tx_dict) is None diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index 7df7ca11..58c0eae4 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -84,7 +84,7 @@ def test_vote_validate_block(b): validation = vote_obj.validate_block(block.to_dict()) assert validation[0] == block.id for tx1, tx2 in zip(validation[1], block.transactions): - assert tx1 == tx2 + assert tx1 == tx2.to_dict() block = b.create_block([tx]) # NOTE: Setting a blocks signature to `None` invalidates it. @@ -142,7 +142,7 @@ def test_vote_validate_transaction(b): from bigchaindb.pipelines import vote from bigchaindb.common.exceptions import ValidationError - tx = dummy_tx(b) + tx = dummy_tx(b).to_dict() vote_obj = vote.Vote() validation = vote_obj.validate_tx(tx, 123, 1) assert validation == (True, 123, 1) @@ -165,15 +165,13 @@ def test_vote_accumulates_transactions(b): vote_obj = vote.Vote() - for _ in range(10): - tx = dummy_tx(b) + tx = dummy_tx(b) - tx = tx - validation = vote_obj.validate_tx(tx, 123, 1) + validation = vote_obj.validate_tx(tx.to_dict(), 123, 1) assert validation == (True, 123, 1) tx.inputs[0].fulfillment.signature = None - validation = vote_obj.validate_tx(tx, 456, 10) + validation = vote_obj.validate_tx(tx.to_dict(), 456, 10) assert validation == (False, 456, 10) @@ -185,16 +183,17 @@ def test_valid_block_voting_sequential(b, genesis_block, monkeypatch): monkeypatch.setattr('time.time', lambda: 1111111111) vote_obj = vote.Vote() - block = dummy_block(b) + block = dummy_block(b).to_dict() + txs = block['block']['transactions'] - for tx, block_id, num_tx in vote_obj.ungroup(block.id, block.transactions): + for tx, block_id, num_tx in vote_obj.ungroup(block['id'], txs): last_vote = vote_obj.vote(*vote_obj.validate_tx(tx, block_id, num_tx)) vote_obj.write_vote(last_vote) vote_rs = query.get_votes_by_block_id_and_voter(b.connection, block_id, b.me) vote_doc = vote_rs.next() - assert vote_doc['vote'] == {'voting_for_block': block.id, + assert vote_doc['vote'] == {'voting_for_block': block['id'], 'previous_block': genesis_block.id, 'is_block_valid': True, 'invalid_reason': None, @@ -655,7 +654,7 @@ def test_vote_no_double_inclusion(b): tx = dummy_tx(b) block = b.create_block([tx]) - r = vote.Vote().validate_tx(tx, block.id, 1) + r = vote.Vote().validate_tx(tx.to_dict(), block.id, 1) assert r == (True, block.id, 1) b.write_block(block) From b6ae91c541217a66d0a0c5a2299685c12f60749a Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 22 May 2017 12:47:07 +0200 Subject: [PATCH 41/60] documentation fix --- bigchaindb/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 9f13a131..66c267a9 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -245,7 +245,7 @@ class Block(object): Args: block_body (dict): A block dictionary to be transformed. - tx_class (class): Transaction class to use + tx_construct (functions): Function to instantiate Transaction instance Returns: :class:`~Block` From 954df20dac4a42b90388fbfda71bdd8beeaa4cf0 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 22 May 2017 12:50:51 +0200 Subject: [PATCH 42/60] documentation fix --- docs/server/source/server-reference/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/server/source/server-reference/configuration.md b/docs/server/source/server-reference/configuration.md index 04739db3..2af4cbb1 100644 --- a/docs/server/source/server-reference/configuration.md +++ b/docs/server/source/server-reference/configuration.md @@ -152,7 +152,7 @@ These settings are for the [Gunicorn HTTP server](http://gunicorn.org/), which i [Gunicorn's documentation](http://docs.gunicorn.org/en/latest/settings.html#loglevel) for more information. -`server.workers` is [the number of worker processes](http://docs.gunicorn.org/en/stable/settings.html#workers) for handling requests. If `None` (the default), the value will be (cpu_count * 2 + 1). If `None` (the default), the value will be 1. The HTTP server will be able to handle `server.workers` requests simultaneously. +`server.workers` is [the number of worker processes](http://docs.gunicorn.org/en/stable/settings.html#workers) for handling requests. If `None` (the default), the value will be (cpu_count * 2 + 1). Each worker process has a single thread. The HTTP server will be able to handle `server.workers` requests simultaneously. **Example using environment variables** ```text From ccb4bfd24414a9f0ee1e2b2f70c316ecb50d155c Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 22 May 2017 14:22:12 +0200 Subject: [PATCH 43/60] document get_new_blocks_feed --- bigchaindb/backend/query.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 8050c415..458b68be 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -288,4 +288,14 @@ def get_txids_filtered(connection, asset_id, operation=None): @singledispatch def get_new_blocks_feed(connection, start_block_id): + """ + Return a generator that yields change events of the blocks feed + + Args: + start_block_id (str): ID of block to resume from + + Returns: + Generator of change events + """ + raise NotImplementedError From 710fde686d3ecaf186c95785a29f952613a54f0a Mon Sep 17 00:00:00 2001 From: Krish Date: Wed, 17 May 2017 10:12:56 +0200 Subject: [PATCH 44/60] Update bigchaindb default deployment version Updated to the latest release, version 0.10.2. --- k8s/bigchaindb/bigchaindb-dep.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/bigchaindb/bigchaindb-dep.yaml b/k8s/bigchaindb/bigchaindb-dep.yaml index 1e8ca56d..140ef50e 100644 --- a/k8s/bigchaindb/bigchaindb-dep.yaml +++ b/k8s/bigchaindb/bigchaindb-dep.yaml @@ -18,7 +18,7 @@ spec: terminationGracePeriodSeconds: 10 containers: - name: bigchaindb - image: bigchaindb/bigchaindb:0.10.1 + image: bigchaindb/bigchaindb:0.10.2 imagePullPolicy: IfNotPresent args: - start From e12e95d9d8856cdc1e7a5f206b06e03500ca351c Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Tue, 23 May 2017 13:19:10 +0200 Subject: [PATCH 45/60] use rapidjson-schema --- bigchaindb/common/schema/__init__.py | 28 ++++++++++++++++++++++++---- setup.py | 1 + 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/bigchaindb/common/schema/__init__.py b/bigchaindb/common/schema/__init__.py index a69793ad..cc4bfb9d 100644 --- a/bigchaindb/common/schema/__init__.py +++ b/bigchaindb/common/schema/__init__.py @@ -3,6 +3,8 @@ import os.path import jsonschema import yaml +import rapidjson +import rapidjson_schema from bigchaindb.common.exceptions import SchemaValidationError @@ -25,7 +27,8 @@ def _load_schema(name): with open(path) as handle: schema = yaml.safe_load(handle) drop_schema_descriptions(schema) - return path, schema + fast_schema = rapidjson_schema.loads(rapidjson.dumps(schema)) + return path, (schema, fast_schema) TX_SCHEMA_PATH, TX_SCHEMA_COMMON = _load_schema('transaction') @@ -36,10 +39,27 @@ VOTE_SCHEMA_PATH, VOTE_SCHEMA = _load_schema('vote') def _validate_schema(schema, body): """ Validate data against a schema """ + + # Note + # + # Schema validation is currently the major CPU bottleneck of + # BigchainDB. the `jsonschema` library validates python data structures + # directly and produces nice error messages, but validation takes 4+ ms + # per transaction which is pretty slow. The rapidjson library validates + # much faster at 1.5ms, however it produces _very_ poor error messages. + # For this reason we use both, rapidjson as an optimistic pathway and + # jsonschema as a fallback in case there is a failure, so we can produce + # a helpful error message. + try: - jsonschema.validate(body, schema) - except jsonschema.ValidationError as exc: - raise SchemaValidationError(str(exc)) from exc + schema[1].validate(rapidjson.dumps(body)) + except ValueError as exc: + try: + jsonschema.validate(body, schema[0]) + except jsonschema.ValidationError as exc2: + raise SchemaValidationError(str(exc2)) from exc2 + raise Exception('jsonschema did not raise an exception, wheras rapidjson raised', + exc) def validate_transaction_schema(tx): diff --git a/setup.py b/setup.py index 55543ea3..4fd485c0 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ install_requires = [ 'jsonschema~=2.5.1', 'pyyaml~=3.12', 'aiohttp~=2.0', + 'python-rapidjson-schema==0.1.1', ] setup( From 6900e864584f89dd8f07132a45f801b16be4bc10 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 24 May 2017 11:38:15 +0200 Subject: [PATCH 46/60] Filter out assets from invalid transactions - Created docstrings - Created tests - Raise an exception when trying to use text search with RethinkDB. --- bigchaindb/backend/mongodb/query.py | 2 +- bigchaindb/backend/query.py | 28 ++++++++-- bigchaindb/core.py | 11 ++++ tests/db/test_bigchain_api.py | 83 +++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 4 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 3d989a3d..ffba23be 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -330,7 +330,7 @@ def get_unvoted_blocks(conn, node_pubkey): @register_query(MongoDBConnection) -def text_search(conn, search, language='english', case_sensitive=False, +def text_search(conn, search, *, language='english', case_sensitive=False, diacritic_sensitive=False, text_score=False, limit=0): cursor = conn.run( conn.collection('assets') diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 705b0306..83179c2d 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -2,6 +2,8 @@ from functools import singledispatch +from bigchaindb.backend.exceptions import OperationError + @singledispatch def write_transaction(connection, signed_transaction): @@ -328,8 +330,28 @@ def get_txids_filtered(connection, asset_id, operation=None): @singledispatch -def text_search(conn, search, language='english', case_sensitive=False, +def text_search(conn, search, *, language='english', case_sensitive=False, diacritic_sensitive=False, text_score=False, limit=0): - # TODO: docstring + """Return all the assets that match the text search. - raise NotImplementedError + The results are sorted by text score. + + Args: + search (str): Text search string to query the text index + language (str, optional): The language for the search and the rules for + stemmer and tokenizer. If the language is `None` text search uses + simple tokenization and no stemming. + case_sensitive (bool, optional): Enable or disable case sensitive + search. + diacritic_sensitive (bool, optional): Enable or disable case sensitive + diacritic search. + text_score (bool, optional): If `True` returns the text score with + each document. + limit (int, optional): Limit the number of returned documents. + + Returns: + :obj:`list` of :obj:`dict`: a list of assets + """ + + raise OperationError('This query is only supported when running ' + 'BigchainDB with MongoDB as the backend.') diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 1cd21222..42589b36 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -662,3 +662,14 @@ class Bigchain(object): the database. """ return backend.query.write_assets(self.connection, assets) + + def text_search(self, search, *, limit=0): + assets = backend.query.text_search(self.connection, search, limit=limit) + + # TODO: This is not efficient. There may be a more efficient way to + # query by storing block ids with the assets and using fastquery. + # See https://github.com/bigchaindb/bigchaindb/issues/1496 + for asset in assets: + tx, status = self.get_transaction(asset['id'], True) + if status == self.TX_VALID: + yield asset diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 5960f171..339c565d 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -213,6 +213,89 @@ class TestBigchainApi(object): assert b.get_transaction(tx1.id) is None assert b.get_transaction(tx2.id) == tx2 + @pytest.mark.genesis + def test_text_search(self, b): + from bigchaindb.models import Transaction + from bigchaindb.backend.exceptions import OperationError + from bigchaindb.backend.mongodb.connection import MongoDBConnection + + # define the assets + asset1 = {'msg': 'BigchainDB 1'} + asset2 = {'msg': 'BigchainDB 2'} + asset3 = {'msg': 'BigchainDB 3'} + + # create the transactions + tx1 = Transaction.create([b.me], [([b.me], 1)], + asset=asset1).sign([b.me_private]) + tx2 = Transaction.create([b.me], [([b.me], 1)], + asset=asset2).sign([b.me_private]) + tx3 = Transaction.create([b.me], [([b.me], 1)], + asset=asset3).sign([b.me_private]) + + # create the block + block = b.create_block([tx1, tx2, tx3]) + b.write_block(block) + + # vote valid + vote = b.vote(block.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + + # get the assets through text search + # this query only works with MongoDB + try: + assets = list(b.text_search('bigchaindb')) + except OperationError as exc: + assert not isinstance(b.connection, MongoDBConnection) + return + + assert len(assets) == 3 + + @pytest.mark.genesis + def test_text_search_returns_valid_only(self, monkeypatch, b): + from bigchaindb.models import Transaction + from bigchaindb.backend.exceptions import OperationError + from bigchaindb.backend.mongodb.connection import MongoDBConnection + + asset_valid = {'msg': 'Hello BigchainDB!'} + asset_invalid = {'msg': 'Goodbye BigchainDB!'} + + monkeypatch.setattr('time.time', lambda: 1000000000) + tx1 = Transaction.create([b.me], [([b.me], 1)], + asset=asset_valid) + tx1 = tx1.sign([b.me_private]) + block1 = b.create_block([tx1]) + b.write_block(block1) + + monkeypatch.setattr('time.time', lambda: 1000000020) + tx2 = Transaction.create([b.me], [([b.me], 1)], + asset=asset_invalid) + tx2 = tx2.sign([b.me_private]) + block2 = b.create_block([tx2]) + b.write_block(block2) + + # vote the first block valid + vote = b.vote(block1.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + + # vote the second block invalid + vote = b.vote(block2.id, b.get_last_voted_block().id, False) + b.write_vote(vote) + + # get assets with text search + try: + assets = list(b.text_search('bigchaindb')) + except OperationError: + assert not isinstance(b.connection, MongoDBConnection) + return + + # should only return one asset + assert len(assets) == 1 + # should return the asset created by tx1 + assert assets[0] == { + 'data': {'msg': 'Hello BigchainDB!'}, + 'id': tx1.id + } + @pytest.mark.usefixtures('inputs') def test_write_transaction(self, b, user_pk, user_sk): from bigchaindb import Bigchain From 17cf9178cee27007a883c7bfe27cba52766f00a7 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Wed, 24 May 2017 11:38:16 +0200 Subject: [PATCH 47/60] restore tx validation in block --- bigchaindb/pipelines/block.py | 15 +++++++++++---- tests/pipelines/test_block_creation.py | 13 +++++++------ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/bigchaindb/pipelines/block.py b/bigchaindb/pipelines/block.py index 40e5afe8..0fe327bb 100644 --- a/bigchaindb/pipelines/block.py +++ b/bigchaindb/pipelines/block.py @@ -12,7 +12,7 @@ from multipipes import Pipeline, Node, Pipe import bigchaindb from bigchaindb import backend from bigchaindb.backend.changefeed import ChangeFeed -from bigchaindb.models import FastTransaction +from bigchaindb.models import Transaction from bigchaindb.common.exceptions import ValidationError from bigchaindb import Bigchain @@ -57,11 +57,11 @@ class BlockPipeline: tx (dict): the transaction to validate. Returns: - :class:`~bigchaindb.models.FastTransaction`: The transaction if valid, + :class:`~bigchaindb.models.Transaction`: The transaction if valid, ``None`` otherwise. """ try: - tx = FastTransaction(tx) + tx = Transaction.from_dict(tx) except ValidationError: return None @@ -71,7 +71,14 @@ class BlockPipeline: self.bigchain.delete_transaction(tx.id) return None - return tx + # If transaction is not valid it should not be included + try: + tx.validate(self.bigchain) + return tx + except ValidationError as e: + logger.warning('Invalid tx: %s', e) + self.bigchain.delete_transaction(tx.id) + return None def create(self, tx, timeout=False): """Create a block. diff --git a/tests/pipelines/test_block_creation.py b/tests/pipelines/test_block_creation.py index ffd423e3..27efc65d 100644 --- a/tests/pipelines/test_block_creation.py +++ b/tests/pipelines/test_block_creation.py @@ -28,14 +28,15 @@ def test_filter_by_assignee(b, signed_create_tx): @pytest.mark.bdb -def test_validate_transaction(b): +def test_validate_transaction(b, create_tx): from bigchaindb.pipelines.block import BlockPipeline - # validate_tx doesn't actually validate schema anymore. - tx = {'id': 'a'} - block_maker = BlockPipeline() - assert block_maker.validate_tx(tx).data == tx + + assert block_maker.validate_tx(create_tx.to_dict()) is None + + valid_tx = create_tx.sign([b.me_private]) + assert block_maker.validate_tx(valid_tx.to_dict()) == valid_tx def test_validate_transaction_handles_exceptions(b, signed_create_tx): @@ -49,7 +50,7 @@ def test_validate_transaction_handles_exceptions(b, signed_create_tx): tx_dict = signed_create_tx.to_dict() - with patch('bigchaindb.models.FastTransaction.__init__') as validate: + with patch('bigchaindb.models.Transaction.validate') as validate: # Assert that validationerror gets caught validate.side_effect = ValidationError() assert block_maker.validate_tx(tx_dict) is None From 56379e9bec27e0a9e98ebc564c80f61e8c36c3c7 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 24 May 2017 12:11:40 +0200 Subject: [PATCH 48/60] fix pep8 issue --- tests/backend/mongodb/test_queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index a536525b..897a0f06 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -596,4 +596,4 @@ def test_text_search(): assert list(query.text_search(conn, 'coffee', limit=2)) == [ {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, - ] \ No newline at end of file + ] From cda8259e5704b0d03568d602d7f039c5219bf518 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 24 May 2017 12:38:34 +0200 Subject: [PATCH 49/60] Fixed typos --- bigchaindb/backend/query.py | 4 ++-- bigchaindb/core.py | 2 +- bigchaindb/models.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 8245fb3d..9b2197a5 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -229,7 +229,7 @@ def get_assets(connection, asset_ids): """Get a list of assets from the assets table. Args: - asset_ids (list): a of list of ids for the assets to be retrieved from + asset_ids (list): a list of ids for the assets to be retrieved from the database. Returns: @@ -293,7 +293,7 @@ def get_last_voted_block_id(connection, node_pubkey): node_pubkey (str): base58 encoded public key. Returns: - The last block id the node has voted on. If the node didn't cast + The id of the last block the node has voted on. If the node didn't cast any vote then the genesis block id is returned. """ diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 1cd21222..b2c8d398 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -648,7 +648,7 @@ class Bigchain(object): asset_ids (:obj:`list` of :obj:`str`): A list of asset_ids to retrieve from the database. - Returs: + Returns: list: The list of assets returned from the database. """ return backend.query.get_assets(self.connection, asset_ids) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index c77c3338..58743939 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -106,7 +106,7 @@ class Transaction(Transaction): if tx_dict['operation'] in [Transaction.CREATE, Transaction.GENESIS]: # TODO: Maybe replace this call to a call to get_asset_by_id asset = list(bigchain.get_assets([tx_dict['id']]))[0] - asset.pop('id') + del asset['id'] tx_dict.update({'asset': asset}) return cls.from_dict(tx_dict) From ae564974561e4de29d8469252aff69ef073432bd Mon Sep 17 00:00:00 2001 From: Leo Arias Date: Fri, 19 May 2017 15:37:38 -0600 Subject: [PATCH 50/60] set the version of the snap using git tags closes #1436 --- snap/snapcraft.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index aa3a9bca..4fe5618f 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -1,5 +1,5 @@ name: bigchaindb -version: master +version: git summary: a scalable blockchain database description: | With high throughput, sub-second latency and powerful functionality to From 4fa0e951b1be95898bda2a673dc9f57e9a390694 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Wed, 24 May 2017 16:16:37 +0200 Subject: [PATCH 51/60] fix pipeline changefeed node name --- bigchaindb/pipelines/vote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigchaindb/pipelines/vote.py b/bigchaindb/pipelines/vote.py index 5140a793..5f747fe8 100644 --- a/bigchaindb/pipelines/vote.py +++ b/bigchaindb/pipelines/vote.py @@ -169,7 +169,7 @@ def get_changefeed(): b = Bigchain() last_block_id = b.get_last_voted_block().id feed = backend.query.get_new_blocks_feed(b.connection, last_block_id) - return Node(feed.__next__) + return Node(feed.__next__, name='changefeed') def start(): From e9979dda85732eb3134fd0a670c9a92ed3c29e25 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 24 May 2017 17:05:43 +0200 Subject: [PATCH 52/60] fixed typos --- bigchaindb/models.py | 13 ++++++------- tests/test_block_model.py | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 58743939..8ca79b67 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -351,7 +351,7 @@ class Block(object): def decouple_assets(self): """ - Extracts the assets from the `CREATE` transactions in the block. + Extracts the assets from the ``CREATE`` transactions in the block. Returns: tuple: (assets, block) with the assets being a list of dicts and @@ -372,9 +372,9 @@ class Block(object): @staticmethod def couple_assets(block_dict, assets): """ - Give a block_dict with not assets (as returned from a database call) - and a list of assets, reconstruct the original block by puting the - assets back into the `CREATE` transactions in the block. + Given a block_dict with no assets (as returned from a database call) + and a list of assets, reconstruct the original block by putting the + assets back into the ``CREATE`` transactions in the block. Args: block_dict (:obj:`dict`): The block dict as returned from a @@ -391,15 +391,14 @@ class Block(object): for transaction in block_dict['block']['transactions']: if transaction['operation'] in [Transaction.CREATE, Transaction.GENESIS]: - transaction.update({'asset': assets.get(transaction['id'], - None)}) + transaction.update({'asset': assets.get(transaction['id'])}) return block_dict @staticmethod def get_asset_ids(block_dict): """ Given a block_dict return all the asset_ids for that block (the txid - of CREATE transactions). Usefull to know which assets to retrieve + of CREATE transactions). Useful to know which assets to retrieve from the database to reconstruct the block. Args: diff --git a/tests/test_block_model.py b/tests/test_block_model.py index 0824c40f..6e14d293 100644 --- a/tests/test_block_model.py +++ b/tests/test_block_model.py @@ -220,7 +220,7 @@ class TestBlockModel(object): block_dict_reconstructed = Block.couple_assets(block_dict, assets_from_block) - # check that the reconstructed block is the as the original block + # check that the reconstructed block is the same as the original block assert block == Block.from_dict(block_dict_reconstructed) def test_get_asset_ids(self, b): From 3819ae5d654968e2c6850e4407abe170ac9034a2 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Fri, 26 May 2017 10:34:58 +0200 Subject: [PATCH 53/60] Clean up code - Fixed docstrings --- bigchaindb/backend/mongodb/query.py | 4 ++-- bigchaindb/backend/query.py | 6 ++++-- tests/db/test_bigchain_api.py | 5 ++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 9b25c0fc..eaef721d 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -371,8 +371,8 @@ def text_search(conn, search, *, language='english', case_sensitive=False, if text_score: return cursor - else: - return (_remove_text_score(asset) for asset in cursor) + + return (_remove_text_score(asset) for asset in cursor) def _remove_text_score(asset): diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 07fd3e4c..74879cef 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -363,17 +363,19 @@ def text_search(conn, search, *, language='english', case_sensitive=False, """Return all the assets that match the text search. The results are sorted by text score. + For more information about the behavior of text search on MongoDB see + https://docs.mongodb.com/manual/reference/operator/query/text/#behavior Args: search (str): Text search string to query the text index language (str, optional): The language for the search and the rules for - stemmer and tokenizer. If the language is `None` text search uses + stemmer and tokenizer. If the language is ``None`` text search uses simple tokenization and no stemming. case_sensitive (bool, optional): Enable or disable case sensitive search. diacritic_sensitive (bool, optional): Enable or disable case sensitive diacritic search. - text_score (bool, optional): If `True` returns the text score with + text_score (bool, optional): If ``True`` returns the text score with each document. limit (int, optional): Limit the number of returned documents. diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index feb24e9c..05b07bf6 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -246,9 +246,8 @@ class TestBigchainApi(object): assets = list(b.text_search('bigchaindb')) except OperationError as exc: assert not isinstance(b.connection, MongoDBConnection) - return - - assert len(assets) == 3 + else: + assert len(assets) == 3 @pytest.mark.genesis def test_text_search_returns_valid_only(self, monkeypatch, b): From 444bf6d197675ba9520f0d2c720793649bb35562 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 29 May 2017 16:42:29 +0200 Subject: [PATCH 54/60] fix rethinkdb changefeed decoupling --- bigchaindb/backend/rethinkdb/query.py | 3 ++- tests/pipelines/stepping.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index cc6bb96f..7d6f78d0 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -251,4 +251,5 @@ def get_new_blocks_feed(connection, start_block_id): start_block_id) # In order to get blocks in the correct order, it may be acceptable to # look in the votes table to see what order other nodes have used. - return changefeed.run_changefeed(connection, 'bigchain') + for change in changefeed.run_changefeed(connection, 'bigchain'): + yield change['new_val'] diff --git a/tests/pipelines/stepping.py b/tests/pipelines/stepping.py index 36f68a6a..beb0d25e 100644 --- a/tests/pipelines/stepping.py +++ b/tests/pipelines/stepping.py @@ -40,7 +40,6 @@ from contextlib import contextmanager from unittest.mock import patch import bigchaindb.core -from bigchaindb.backend.changefeed import ChangeFeed import bigchaindb.pipelines.block import bigchaindb.pipelines.stale import bigchaindb.pipelines.vote @@ -58,7 +57,7 @@ class MultipipesStepper: name = '%s_%s' % (prefix, node.name) next_name = '%s_%s' % (prefix, next.name) - if isinstance(node, ChangeFeed): + if node.name == 'changefeed': self.processes.append(node) def f(*args, **kwargs): From 8b7f86b63d93d3a05ca6aa4211cfdb8f6eda56dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Daubensch=C3=BCtz?= Date: Tue, 30 May 2017 11:21:26 +0200 Subject: [PATCH 55/60] Feat/1462/text search http api (#1471) * Add assets text search endpoint * Filter out assets from invalid transactions. - Added the limit argument to limit the returned results - Created and updated tests * Added documentation for the assets endpoint. - Added some docstrings * Removed unnecessary fixtures --- bigchaindb/backend/query.py | 3 + bigchaindb/core.py | 10 ++ bigchaindb/web/routes.py | 2 + bigchaindb/web/views/assets.py | 50 ++++++++ docs/server/source/http-client-server-api.rst | 112 ++++++++++++++++++ tests/web/test_assets.py | 83 +++++++++++++ 6 files changed, 260 insertions(+) create mode 100644 bigchaindb/web/views/assets.py create mode 100644 tests/web/test_assets.py diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index f989c8b1..8cc5d2c8 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -381,6 +381,9 @@ def text_search(conn, search, *, language='english', case_sensitive=False, Returns: :obj:`list` of :obj:`dict`: a list of assets + + Raises: + OperationError: If the backend does not support text search """ raise OperationError('This query is only supported when running ' diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 923cb8a8..f6611e7d 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -621,6 +621,16 @@ class Bigchain(object): return backend.query.write_assets(self.connection, assets) def text_search(self, search, *, limit=0): + """ + Return an iterator of assets that match the text search + + Args: + search (str): Text search string to query the text index + limit (int, optional): Limit the number of returned documents. + + Returns: + iter: An iterator of assets that match the text search. + """ assets = backend.query.text_search(self.connection, search, limit=limit) # TODO: This is not efficient. There may be a more efficient way to diff --git a/bigchaindb/web/routes.py b/bigchaindb/web/routes.py index b20f8d40..d1fa6d26 100644 --- a/bigchaindb/web/routes.py +++ b/bigchaindb/web/routes.py @@ -1,6 +1,7 @@ """ API routes definition """ from flask_restful import Api from bigchaindb.web.views import ( + assets, blocks, info, statuses, @@ -25,6 +26,7 @@ def r(*args, **kwargs): ROUTES_API_V1 = [ r('/', info.ApiV1Index), + r('assets/', assets.AssetListApi), r('blocks/', blocks.BlockApi), r('blocks/', blocks.BlockListApi), r('statuses/', statuses.StatusApi), diff --git a/bigchaindb/web/views/assets.py b/bigchaindb/web/views/assets.py new file mode 100644 index 00000000..7639b78f --- /dev/null +++ b/bigchaindb/web/views/assets.py @@ -0,0 +1,50 @@ +"""This module provides the blueprint for some basic API endpoints. + +For more information please refer to the documentation: http://bigchaindb.com/http-api +""" +import logging + +from flask_restful import reqparse, Resource +from flask import current_app + +from bigchaindb.backend.exceptions import OperationError +from bigchaindb.web.views.base import make_error + +logger = logging.getLogger(__name__) + + +class AssetListApi(Resource): + def get(self): + """API endpoint to perform a text search on the assets. + + Args: + search (str): Text search string to query the text index + limit (int, optional): Limit the number of returned documents. + + Return: + A list of assets that match the query. + """ + parser = reqparse.RequestParser() + parser.add_argument('search', type=str, required=True) + parser.add_argument('limit', type=int) + args = parser.parse_args() + + if not args['search']: + return make_error(400, 'text_search cannot be empty') + if not args['limit']: + # if the limit is not specified do not pass None to `text_search` + del args['limit'] + + pool = current_app.config['bigchain_pool'] + + with pool() as bigchain: + assets = bigchain.text_search(**args) + + try: + # This only works with MongoDB as the backend + return list(assets) + except OperationError as e: + return make_error( + 400, + '({}): {}'.format(type(e).__name__, e) + ) diff --git a/docs/server/source/http-client-server-api.rst b/docs/server/source/http-client-server-api.rst index eb462d25..9fd7aee5 100644 --- a/docs/server/source/http-client-server-api.rst +++ b/docs/server/source/http-client-server-api.rst @@ -270,6 +270,118 @@ Statuses :statuscode 404: A block with that ID was not found. +Assets +-------------------------------- + +.. http:get:: /api/v1/assets + + Return all the assets that match a given text search. + + :query string text search: Text search string to query. + :query int limit: (Optional) Limit the number of returned assets. Defaults + to ``0`` meaning return all matching assets. + + .. note:: + + Currently this enpoint is only supported if the server is running + MongoDB as the backend. + +.. http:get:: /api/v1/assets?search={text_search} + + Return all assets that match a given text search. The asset is returned + with the ``id`` of the transaction that created the asset. + + If no assets match the text search it returns an empty list. + + If the text string is empty or the server does not support text search, + a ``400`` is returned. + + The results are sorted by text score. + For more information about the behavior of text search see `MongoDB text + search behavior `_ + + **Example request**: + + .. sourcecode:: http + + GET /api/v1/assets/?search=bigchaindb HTTP/1.1 + Host: example.com + + **Example response**: + + .. sourcecode:: http + + HTTP/1.1 200 OK + Content-type: application/json + + [ + { + "data": {"msg": "Hello BigchainDB 1!"}, + "id": "51ce82a14ca274d43e4992bbce41f6fdeb755f846e48e710a3bbb3b0cf8e4204" + }, + { + "data": {"msg": "Hello BigchainDB 2!"}, + "id": "b4e9005fa494d20e503d916fa87b74fe61c079afccd6e084260674159795ee31" + }, + { + "data": {"msg": "Hello BigchainDB 3!"}, + "id": "fa6bcb6a8fdea3dc2a860fcdc0e0c63c9cf5b25da8b02a4db4fb6a2d36d27791" + } + ] + + :resheader Content-Type: ``application/json`` + + :statuscode 200: The query was executed successfully. + :statuscode 400: The query was not executed successfully. Returned if the + text string is empty or the server does not support + text search. + +.. http:get:: /api/v1/assets?search={text_search}&limit={n_documents} + + Return at most ``n`` assets that match a given text search. + + If no assets match the text search it returns an empty list. + + If the text string is empty or the server does not support text search, + a ``400`` is returned. + + The results are sorted by text score. + For more information about the behavior of text search see `MongoDB text + search behavior `_ + + **Example request**: + + .. sourcecode:: http + + GET /api/v1/assets/?search=bigchaindb&limit=2 HTTP/1.1 + Host: example.com + + **Example response**: + + .. sourcecode:: http + + HTTP/1.1 200 OK + Content-type: application/json + + [ + { + "data": {"msg": "Hello BigchainDB 1!"}, + "id": "51ce82a14ca274d43e4992bbce41f6fdeb755f846e48e710a3bbb3b0cf8e4204" + }, + { + "data": {"msg": "Hello BigchainDB 2!"}, + "id": "b4e9005fa494d20e503d916fa87b74fe61c079afccd6e084260674159795ee31" + }, + ] + + :resheader Content-Type: ``application/json`` + + :statuscode 200: The query was executed successfully. + :statuscode 400: The query was not executed successfully. Returned if the + text string is empty or the server does not support + text search. + + Advanced Usage -------------------------------- diff --git a/tests/web/test_assets.py b/tests/web/test_assets.py new file mode 100644 index 00000000..0f335ef2 --- /dev/null +++ b/tests/web/test_assets.py @@ -0,0 +1,83 @@ +import pytest + +ASSETS_ENDPOINT = '/api/v1/assets/' + + +def test_get_assets_with_empty_text_search(client): + res = client.get(ASSETS_ENDPOINT + '?search=') + assert res.json == {'status': 400, + 'message': 'text_search cannot be empty'} + assert res.status_code == 400 + + +def test_get_assets_with_missing_text_search(client): + res = client.get(ASSETS_ENDPOINT) + assert res.status_code == 400 + + +@pytest.mark.genesis +def test_get_assets(client, b): + from bigchaindb.models import Transaction + from bigchaindb.backend.mongodb.connection import MongoDBConnection + + if isinstance(b.connection, MongoDBConnection): + # test returns empty list when no assets are found + res = client.get(ASSETS_ENDPOINT + '?search=abc') + assert res.json == [] + assert res.status_code == 200 + + # create asset + asset = {'msg': 'abc'} + tx = Transaction.create([b.me], [([b.me], 1)], + asset=asset).sign([b.me_private]) + # create block + block = b.create_block([tx]) + b.write_block(block) + # vote valid + vote = b.vote(block.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + + # test that asset is returned + res = client.get(ASSETS_ENDPOINT + '?search=abc') + assert res.status_code == 200 + assert len(res.json) == 1 + assert res.json[0] == { + 'data': {'msg': 'abc'}, + 'id': tx.id + } + else: + # test that the correct error is returned if not running MongoDB + res = client.get(ASSETS_ENDPOINT + '?search=abc') + assert res.status_code == 400 + assert res.json['message'].startswith('(OperationError)') + + +@pytest.mark.genesis +def test_get_assets_limit(client, b): + from bigchaindb.models import Transaction + from bigchaindb.backend.mongodb.connection import MongoDBConnection + + if isinstance(b.connection, MongoDBConnection): + # create two assets + asset1 = {'msg': 'abc 1'} + asset2 = {'msg': 'abc 2'} + tx1 = Transaction.create([b.me], [([b.me], 1)], + asset=asset1).sign([b.me_private]) + tx2 = Transaction.create([b.me], [([b.me], 1)], + asset=asset2).sign([b.me_private]) + # create block + block = b.create_block([tx1, tx2]) + b.write_block(block) + # vote valid + vote = b.vote(block.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + + # test that both assets are returned without limit + res = client.get(ASSETS_ENDPOINT + '?search=abc') + assert res.status_code == 200 + assert len(res.json) == 2 + + # test that only one asset is returned when using limit=1 + res = client.get(ASSETS_ENDPOINT + '?search=abc&limit=1') + assert res.status_code == 200 + assert len(res.json) == 1 From 4216c1733984f4f58c6401c8999afef4bff6ecf4 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Tue, 30 May 2017 14:41:24 +0200 Subject: [PATCH 56/60] test Vote.validate_tx transaction exists --- tests/pipelines/test_vote.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index 8ade412e..14830ce5 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -672,3 +672,14 @@ def test_vote_no_double_inclusion(b): b.write_block(block) r = vote.Vote().validate_tx(tx, 'other_block_id', 1) assert r == (False, 'other_block_id', 1) + + +@pytest.mark.genesis +def test_duplicate_transaction(signed_create_tx): + from bigchaindb.pipelines import vote + + with patch('bigchaindb.core.Bigchain.is_new_transaction') as is_new: + is_new.return_value = False + res = vote.Vote().validate_tx(signed_create_tx.to_dict(), 'a', 1) + assert res == (False, 'a', 1) + assert is_new.call_count == 1 From d51632a67582b71eb0be81f02cdc2a62fe73c1be Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Tue, 30 May 2017 15:14:23 +0200 Subject: [PATCH 57/60] test for vote get_changefeed to make codecov happy --- tests/pipelines/test_vote.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index c170cd04..7eb00441 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -584,3 +584,14 @@ def test_vote_no_double_inclusion(b): b.write_block(block) r = vote.Vote().validate_tx(tx, 'other_block_id', 1) assert r == (False, 'other_block_id', 1) + + +@pytest.mark.genesis +def test_vote_changefeed(b): + from bigchaindb.pipelines import vote + + changefeed = vote.get_changefeed() + tx = dummy_tx(b) + block = b.create_block([tx]) + b.write_block(block) + assert changefeed.target() == block.decouple_assets()[1] From 8348dfb8cec8aabde9e9a1573167db8f662538a8 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Tue, 30 May 2017 17:43:38 +0200 Subject: [PATCH 58/60] test for vote get_changefeed to make codecov happy (fix) --- tests/pipelines/test_vote.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index 7eb00441..b61b3cb4 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -589,9 +589,13 @@ def test_vote_no_double_inclusion(b): @pytest.mark.genesis def test_vote_changefeed(b): from bigchaindb.pipelines import vote + from multiprocessing import Queue changefeed = vote.get_changefeed() + changefeed.outqueue = Queue() + changefeed.start() tx = dummy_tx(b) block = b.create_block([tx]) b.write_block(block) - assert changefeed.target() == block.decouple_assets()[1] + assert changefeed.outqueue.get() == block.decouple_assets()[1] + changefeed.terminate() From 211bdeffb129474666c53a9bdd6c2e6f4d1addc1 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Wed, 31 May 2017 11:33:57 +0200 Subject: [PATCH 59/60] dont raise Exception if there's an inconsistency in schema validation failures --- bigchaindb/common/schema/__init__.py | 8 ++++++-- tests/common/test_schema.py | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/bigchaindb/common/schema/__init__.py b/bigchaindb/common/schema/__init__.py index cc4bfb9d..fc048e0b 100644 --- a/bigchaindb/common/schema/__init__.py +++ b/bigchaindb/common/schema/__init__.py @@ -1,5 +1,6 @@ """ Schema validation related functions and data """ import os.path +import logging import jsonschema import yaml @@ -9,6 +10,9 @@ import rapidjson_schema from bigchaindb.common.exceptions import SchemaValidationError +logger = logging.getLogger(__name__) + + def drop_schema_descriptions(node): """ Drop descriptions from schema, since they clutter log output """ if 'description' in node: @@ -58,8 +62,8 @@ def _validate_schema(schema, body): jsonschema.validate(body, schema[0]) except jsonschema.ValidationError as exc2: raise SchemaValidationError(str(exc2)) from exc2 - raise Exception('jsonschema did not raise an exception, wheras rapidjson raised', - exc) + logger.warning('code problem: jsonschema did not raise an exception, wheras rapidjson raised %s', exc) + raise SchemaValidationError(str(exc)) from exc def validate_transaction_schema(tx): diff --git a/tests/common/test_schema.py b/tests/common/test_schema.py index 1db17170..7f2ca397 100644 --- a/tests/common/test_schema.py +++ b/tests/common/test_schema.py @@ -4,6 +4,7 @@ properties related to validation. """ from pytest import raises +from unittest.mock import patch from bigchaindb.common.exceptions import SchemaValidationError from bigchaindb.common.schema import ( @@ -102,6 +103,12 @@ def test_validate_transaction_fails(): validate_transaction_schema({}) +def test_validate_failure_inconsistent(): + with patch('jsonschema.validate'): + with raises(SchemaValidationError): + validate_transaction_schema({}) + + ################################################################################ # Test call vote schema From 01c6d6e72d3535f2493249ccf4e97f43a59d5817 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Wed, 31 May 2017 17:13:57 +0200 Subject: [PATCH 60/60] remove test, see if codecov is happy --- bigchaindb/backend/rethinkdb/query.py | 2 +- tests/pipelines/test_vote.py | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index 3521f0c3..3ee90f34 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -260,7 +260,7 @@ def get_last_voted_block_id(connection, node_pubkey): @register_query(RethinkDBConnection) -def get_new_blocks_feed(connection, start_block_id): +def get_new_blocks_feed(connection, start_block_id): # pragma: no cover logger.warning('RethinkDB changefeed unable to resume from given block: %s', start_block_id) # In order to get blocks in the correct order, it may be acceptable to diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index b61b3cb4..c170cd04 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -584,18 +584,3 @@ def test_vote_no_double_inclusion(b): b.write_block(block) r = vote.Vote().validate_tx(tx, 'other_block_id', 1) assert r == (False, 'other_block_id', 1) - - -@pytest.mark.genesis -def test_vote_changefeed(b): - from bigchaindb.pipelines import vote - from multiprocessing import Queue - - changefeed = vote.get_changefeed() - changefeed.outqueue = Queue() - changefeed.start() - tx = dummy_tx(b) - block = b.create_block([tx]) - b.write_block(block) - assert changefeed.outqueue.get() == block.decouple_assets()[1] - changefeed.terminate()