From 4f991227586dffe457c1fc04f4f7eba783bdd3a4 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Wed, 19 Apr 2017 15:47:58 +0200 Subject: [PATCH 01/21] fast unspents --- bigchaindb/backend/mongodb/query.py | 31 ++++++++++- bigchaindb/backend/query.py | 28 ++++++++++ bigchaindb/common/transaction.py | 3 + bigchaindb/core.py | 11 ++-- bigchaindb/fastquery.py | 56 +++++++++++++++++++ tests/backend/mongodb/test_queries.py | 51 +++++++++++++++++ tests/db/test_bigchain_api.py | 12 ++-- tests/test_fastquery.py | 80 +++++++++++++++++++++++++++ 8 files changed, 261 insertions(+), 11 deletions(-) create mode 100644 bigchaindb/fastquery.py create mode 100644 tests/test_fastquery.py diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 74b9c35a..d564e242 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -176,13 +176,33 @@ def get_spent(conn, transaction_id, output): @register_query(MongoDBConnection) -def get_owned_ids(conn, owner): +def get_spending_transactions(conn, inputs): + return conn.run( + conn.collection('bigchain').aggregate([ + {'$match': { + 'block.transactions.inputs.fulfills': { + '$in': inputs, + }, + }}, + {'$unwind': '$block.transactions'}, + {'$match': { + 'block.transactions.inputs.fulfills': { + '$in': inputs, + }, + }}, + ])) + + +@register_query(MongoDBConnection) +def get_owned_ids(conn, owner, unwrap=True): cursor = conn.run( conn.collection('bigchain').aggregate([ {'$match': {'block.transactions.outputs.public_keys': owner}}, {'$unwind': '$block.transactions'}, {'$match': {'block.transactions.outputs.public_keys': owner}} ])) + if not unwrap: + return cursor # we need to access some nested fields before returning so lets use a # generator to avoid having to read all records on the cursor at this point return (elem['block']['transactions'] for elem in cursor) @@ -196,6 +216,15 @@ def get_votes_by_block_id(conn, block_id): projection={'_id': False})) +@register_query(MongoDBConnection) +def get_votes_for_blocks_by_voter(conn, block_ids, node_pubkey): + return conn.run( + conn.collection('votes') + .find({'vote.voting_for_block': {'$in': block_ids}, + 'node_pubkey': node_pubkey}, + projection={'_id': False})) + + @register_query(MongoDBConnection) def get_votes_by_block_id_and_voter(conn, block_id, node_pubkey): return conn.run( diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 9aa653d7..5ae39557 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -140,6 +140,19 @@ def get_spent(connection, transaction_id, condition_id): raise NotImplementedError +@singledispatch +def get_spending_transactions(connection, inputs): + """Return transactions which spend given inputs + + Args: + inputs (list): list of {txid, output} + + Returns: + List of transactions that spend given inputs + """ + raise NotImplementedError + + @singledispatch def get_owned_ids(connection, owner): """Retrieve a list of `txids` that can we used has inputs. @@ -183,6 +196,21 @@ def get_votes_by_block_id_and_voter(connection, block_id, node_pubkey): raise NotImplementedError +@singledispatch +def get_votes_for_blocks_by_voter(connection, block_ids, pubkey): + """Return votes for many block_ids + + Args: + block_ids (set): block_ids + pubkey (str): public key of voting node + + Returns: + A cursor of votes matching given votes. + """ + + raise NotImplementedError + + @singledispatch def write_block(connection, block): """Write a block to the bigchain table. diff --git a/bigchaindb/common/transaction.py b/bigchaindb/common/transaction.py index e956812f..7d17172a 100644 --- a/bigchaindb/common/transaction.py +++ b/bigchaindb/common/transaction.py @@ -161,6 +161,9 @@ class TransactionLink(object): # TODO: If `other !== TransactionLink` return `False` return self.to_dict() == other.to_dict() + def __hash__(self): + return hash((self.txid, self.output)) + @classmethod def from_dict(cls, link): """Transforms a Python dictionary to a TransactionLink object. diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 5d2e9c03..be7cdcbc 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -8,7 +8,7 @@ from bigchaindb.common.transaction import TransactionLink import bigchaindb -from bigchaindb import backend, config_utils, utils +from bigchaindb import backend, config_utils, utils, fastquery from bigchaindb.consensus import BaseConsensusRules from bigchaindb.models import Block, Transaction @@ -433,14 +433,17 @@ class Bigchain(object): """ return self.get_outputs_filtered(owner, include_spent=False) + @property + def fastquery(self): + return fastquery.FastQuery(self.connection, self.me) + def get_outputs_filtered(self, owner, include_spent=True): """ Get a list of output links filtered on some criteria """ - outputs = self.get_outputs(owner) + outputs = self.fastquery.get_outputs_by_pubkey(owner) if not include_spent: - outputs = [o for o in outputs - if not self.get_spent(o.txid, o.output)] + outputs = self.fastquery.filter_spent_outputs(outputs) return outputs def get_transactions_filtered(self, asset_id, operation=None): diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py new file mode 100644 index 00000000..f2a8fb09 --- /dev/null +++ b/bigchaindb/fastquery.py @@ -0,0 +1,56 @@ +from bigchaindb.utils import output_has_owner +from bigchaindb.backend import query +from bigchaindb.common.transaction import TransactionLink + + +class FastQuery: + + """ + Database queries that join on block results from a single node. + + * Votes are not validated for security (security is replication concern) + * Votes come from only one node, and as such, fault tolerance is not provided + (elected Blockchain table not yet available) + + In return, these queries offer good performance, as it is not neccesary to validate + each result separately. + """ + + def __init__(self, connection, me): + self.connection = connection + self.me = me + + def filter_block_ids(self, block_ids, include_undecided=True): + votes = query.get_votes_for_blocks_by_voter( + self.connection, block_ids, self.me) + votes = {v['vote']['voting_for_block']: v['vote']['is_block_valid'] for v in votes} + return [b for b in block_ids if votes.get(b, include_undecided)] + + def filter_valid_blocks(self, blocks): + block_ids = list(set(b['id'] for b in blocks)) + valid_block_ids = self.filter_block_ids(block_ids) + return [b for b in blocks if b['id'] in valid_block_ids] + + def get_outputs_by_pubkey(self, pubkey): + cursor = query.get_owned_ids(self.connection, pubkey, unwrap=False) + wrapped_txs = self.filter_valid_blocks(list(cursor)) + txs = [wrapped['block']['transactions'] for wrapped in wrapped_txs] + return [TransactionLink(tx['id'], i) + for tx in txs + for i, o in enumerate(tx['outputs']) + if output_has_owner(o, pubkey)] + + def filter_spent_outputs(self, outputs): + """ + Remove outputs that have been spent + + Args: + outputs: list of TransactionLink + """ + links = [o.to_dict() for o in outputs] + wrapped = self.filter_valid_blocks( + list(query.get_spending_transactions(self.connection, links))) + spends = {TransactionLink.from_dict(input_['fulfills']) + for block in wrapped + for input_ in block['block']['transactions']['inputs']} + return [ff for ff in outputs if ff not in spends] diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index bd7e75f1..c8a03011 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -417,3 +417,54 @@ def test_get_txids_filtered(signed_create_tx, signed_transfer_tx): # Test get by asset and TRANSFER txids = set(query.get_txids_filtered(conn, asset_id, Transaction.TRANSFER)) assert txids == {signed_transfer_tx.id} + + +def test_get_spending_transactions(user_pk): + from bigchaindb.backend import connect, query + from bigchaindb.models import Block, Transaction + conn = connect() + + out = [([user_pk], 1)] + tx1 = Transaction.create([user_pk], out * 3) + inputs = tx1.to_inputs() + tx2 = Transaction.transfer([inputs[0]], out, tx1.id) + tx3 = Transaction.transfer([inputs[1]], out, tx1.id) + tx4 = Transaction.transfer([inputs[2]], out, tx1.id) + block = Block([tx1, tx2, tx3, tx4]) + conn.db.bigchain.insert_one(block.to_dict()) + + links = [inputs[0].fulfills.to_dict(), inputs[2].fulfills.to_dict()] + # discard block noise + res = [(r['id'], r['block']['transactions']) + for r in list(query.get_spending_transactions(conn, links))] + + # tx3 not a member because input 1 not asked for + assert res == [(block.id, tx2.to_dict()), (block.id, tx4.to_dict())] + + +def test_get_votes_for_blocks_by_voter(): + from bigchaindb.backend import connect, query + + conn = connect() + votes = [ + { + 'node_pubkey': 'a', + 'vote': {'voting_for_block': 'block1'}, + }, + { + 'node_pubkey': 'b', + 'vote': {'voting_for_block': 'block1'}, + }, + { + 'node_pubkey': 'a', + 'vote': {'voting_for_block': 'block2'}, + }, + { + 'node_pubkey': 'a', + 'vote': {'voting_for_block': 'block3'}, + } + ] + for vote in votes: + conn.db.votes.insert_one(vote.copy()) + res = query.get_votes_for_blocks_by_voter(conn, ['block1', 'block2'], 'a') + assert list(res) == [votes[0], votes[2]] diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 3f05385c..589671e5 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -1119,11 +1119,11 @@ def test_get_owned_ids_calls_get_outputs_filtered(): def test_get_outputs_filtered_only_unspent(): from bigchaindb.common.transaction import TransactionLink from bigchaindb.core import Bigchain - with patch('bigchaindb.core.Bigchain.get_outputs') as get_outputs: + with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_pubkey') as get_outputs: get_outputs.return_value = [TransactionLink('a', 1), TransactionLink('b', 2)] - with patch('bigchaindb.core.Bigchain.get_spent') as get_spent: - get_spent.side_effect = [True, False] + with patch('bigchaindb.fastquery.FastQuery.filter_spent_outputs') as filter_spent: + filter_spent.return_value = [TransactionLink('b', 2)] out = Bigchain().get_outputs_filtered('abc', include_spent=False) get_outputs.assert_called_once_with('abc') assert out == [TransactionLink('b', 2)] @@ -1132,13 +1132,13 @@ def test_get_outputs_filtered_only_unspent(): def test_get_outputs_filtered(): from bigchaindb.common.transaction import TransactionLink from bigchaindb.core import Bigchain - with patch('bigchaindb.core.Bigchain.get_outputs') as get_outputs: + with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_pubkey') as get_outputs: get_outputs.return_value = [TransactionLink('a', 1), TransactionLink('b', 2)] - with patch('bigchaindb.core.Bigchain.get_spent') as get_spent: + with patch('bigchaindb.fastquery.FastQuery.filter_spent_outputs') as filter_spent: out = Bigchain().get_outputs_filtered('abc') get_outputs.assert_called_once_with('abc') - get_spent.assert_not_called() + filter_spent.assert_not_called() assert out == get_outputs.return_value diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py new file mode 100644 index 00000000..b730eee9 --- /dev/null +++ b/tests/test_fastquery.py @@ -0,0 +1,80 @@ +import pytest + +from bigchaindb.common.transaction import TransactionLink +from bigchaindb.models import Block, Transaction + +pytestmark = pytest.mark.bdb + + +@pytest.fixture +def blockdata(b, user_pk, user2_pk): + txs = [Transaction.create([user_pk], [([user2_pk], 1)]), + Transaction.create([user2_pk], [([user_pk], 1)]), + Transaction.create([user_pk], [([user_pk], 1), ([user2_pk], 1)])] + blocks = [] + for i in range(3): + block = Block([txs[i]]) + b.write_block(block) + blocks.append(block.to_dict()) + if i > 0: + b.write_vote(b.vote(block.id, '', i % 2 == 1)) + return blocks, [b['id'] for b in blocks] + + +def test_filter_block_ids_with_undecided(b, blockdata): + blocks, block_ids = blockdata + valid_ids = b.fastquery.filter_block_ids(block_ids) + assert set(valid_ids) == {blocks[0]['id'], blocks[1]['id']} + + +def test_filter_block_ids_only_valid(b, blockdata): + blocks, block_ids = blockdata + valid_ids = b.fastquery.filter_block_ids(block_ids, include_undecided=False) + assert set(valid_ids) == {blocks[1]['id']} + + +def test_filter_valid_blocks(b, blockdata): + blocks, _ = blockdata + assert b.fastquery.filter_valid_blocks(blocks) == [blocks[0], blocks[1]] + + +def test_get_outputs_by_pubkey(b, user_pk, user2_pk, blockdata): + blocks, _ = blockdata + assert b.fastquery.get_outputs_by_pubkey(user_pk) == [ + TransactionLink(blocks[1]['block']['transactions'][0]['id'], 0) + ] + assert b.fastquery.get_outputs_by_pubkey(user2_pk) == [ + TransactionLink(blocks[0]['block']['transactions'][0]['id'], 0) + ] + + +def test_filter_spent_outputs(b, user_pk): + out = [([user_pk], 1)] + tx1 = Transaction.create([user_pk], out * 3) + inputs = tx1.to_inputs() + tx2 = Transaction.transfer([inputs[0]], out, tx1.id) + tx3 = Transaction.transfer([inputs[1]], out, tx1.id) + tx4 = Transaction.transfer([inputs[2]], out, tx1.id) + + for tx in [tx1, tx2]: + block = Block([tx]) + b.write_block(block) + b.write_vote(b.vote(block.id, '', True)) + + # mark invalid + block = Block([tx3]) + b.write_block(block) + b.write_vote(b.vote(block.id, '', False)) + + # undecided + block = Block([tx4]) + b.write_block(block) + + unspents = b.fastquery.filter_spent_outputs( + b.fastquery.get_outputs_by_pubkey(user_pk)) + + assert set(unspents) == { + inputs[1].fulfills, + tx2.to_inputs()[0].fulfills, + tx4.to_inputs()[0].fulfills + } From 5b6fa13d798a6bed27fc662f1db6b2a11e7ef885 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Thu, 20 Apr 2017 16:58:29 +0200 Subject: [PATCH 02/21] fast unspent queries for RethinkDB --- bigchaindb/backend/rethinkdb/query.py | 38 +++++++++++++++++++++++---- bigchaindb/fastquery.py | 4 +-- tests/test_fastquery.py | 13 ++++++--- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index 6011cc8c..6a2c644e 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -120,14 +120,16 @@ def get_spent(connection, transaction_id, output): @register_query(RethinkDBConnection) -def get_owned_ids(connection, owner): - return connection.run( - r.table('bigchain', read_mode=READ_MODE) +def get_owned_ids(connection, owner, unwrap=True): + query = (r.table('bigchain', read_mode=READ_MODE) .get_all(owner, index='outputs') .distinct() - .concat_map(lambda doc: doc['block']['transactions']) - .filter(lambda tx: tx['outputs'].contains( + .concat_map(unroll_block_transactions) + .filter(lambda doc: doc['block']['transactions']['outputs'].contains( lambda c: c['public_keys'].contains(owner)))) + if unwrap: + query = query.map(lambda doc: doc['block']['transactions']) + return connection.run(query) @register_query(RethinkDBConnection) @@ -253,3 +255,29 @@ def get_unvoted_blocks(connection, node_pubkey): # database level. Solving issue #444 can help untangling the situation unvoted_blocks = filter(lambda block: not utils.is_genesis_block(block), unvoted) return unvoted_blocks + + +@register_query(RethinkDBConnection) +def get_votes_for_blocks_by_voter(connection, block_ids, node_pubkey): + return connection.run( + r.table('votes') + .filter(lambda row: r.expr(block_ids).contains(row['vote']['voting_for_block'])) + .filter(lambda row: row['node_pubkey'] == node_pubkey)) + + +def unroll_block_transactions(block): + """ Simulate unrolling a transaction into block in MongoDB """ + return block['block']['transactions'].map( + lambda tx: block.merge({'block': {'transactions': tx}})) + + +@register_query(RethinkDBConnection) +def get_spending_transactions(connection, links): + query = ( + r.table('bigchain') + .get_all(*[(l['txid'], l['output']) for l in links], index='inputs') + .concat_map(unroll_block_transactions) + .filter(lambda doc: r.expr(links).set_intersection( + doc['block']['transactions']['inputs'].map(lambda i: i['fulfills']))) + ) + return connection.run(query) diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py index f2a8fb09..7b73ab64 100644 --- a/bigchaindb/fastquery.py +++ b/bigchaindb/fastquery.py @@ -48,8 +48,8 @@ class FastQuery: outputs: list of TransactionLink """ links = [o.to_dict() for o in outputs] - wrapped = self.filter_valid_blocks( - list(query.get_spending_transactions(self.connection, links))) + spending_txs = query.get_spending_transactions(self.connection, links) + wrapped = self.filter_valid_blocks(list(spending_txs)) spends = {TransactionLink.from_dict(input_['fulfills']) for block in wrapped for input_ in block['block']['transactions']['inputs']} diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py index b730eee9..8621751a 100644 --- a/tests/test_fastquery.py +++ b/tests/test_fastquery.py @@ -51,27 +51,32 @@ def test_get_outputs_by_pubkey(b, user_pk, user2_pk, blockdata): def test_filter_spent_outputs(b, user_pk): out = [([user_pk], 1)] tx1 = Transaction.create([user_pk], out * 3) + + # There are 3 inputs inputs = tx1.to_inputs() + + # Each spent individually tx2 = Transaction.transfer([inputs[0]], out, tx1.id) tx3 = Transaction.transfer([inputs[1]], out, tx1.id) tx4 = Transaction.transfer([inputs[2]], out, tx1.id) + # The CREATE and first TRANSFER are valid. tx2 produces a new unspent. for tx in [tx1, tx2]: block = Block([tx]) b.write_block(block) b.write_vote(b.vote(block.id, '', True)) - # mark invalid + # The second TRANSFER is invalid. inputs[1] remains unspent. block = Block([tx3]) b.write_block(block) b.write_vote(b.vote(block.id, '', False)) - # undecided + # The third TRANSFER is undecided. It procuces a new unspent. block = Block([tx4]) b.write_block(block) - unspents = b.fastquery.filter_spent_outputs( - b.fastquery.get_outputs_by_pubkey(user_pk)) + outputs = b.fastquery.get_outputs_by_pubkey(user_pk) + unspents = b.fastquery.filter_spent_outputs(outputs) assert set(unspents) == { inputs[1].fulfills, From 29247a9994758e2d511a2f686abb3cc4b8f94e54 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Thu, 20 Apr 2017 16:58:40 +0200 Subject: [PATCH 03/21] test clarification --- tests/test_fastquery.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py index 8621751a..200bd7f9 100644 --- a/tests/test_fastquery.py +++ b/tests/test_fastquery.py @@ -16,8 +16,8 @@ def blockdata(b, user_pk, user2_pk): block = Block([txs[i]]) b.write_block(block) blocks.append(block.to_dict()) - if i > 0: - b.write_vote(b.vote(block.id, '', i % 2 == 1)) + b.write_vote(b.vote(blocks[1]['id'], '', True)) + b.write_vote(b.vote(blocks[2]['id'], '', False)) return blocks, [b['id'] for b in blocks] From 5d767c1162effa3f657f5d6c5c5a3594bdc11662 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Fri, 21 Apr 2017 12:23:53 +0200 Subject: [PATCH 04/21] light refactoring to remove some redundancies and fix test coverage --- bigchaindb/backend/mongodb/query.py | 11 +++--- bigchaindb/backend/rethinkdb/query.py | 19 +++++------ bigchaindb/core.py | 48 +-------------------------- bigchaindb/fastquery.py | 32 ++++++++++-------- tests/backend/mongodb/test_queries.py | 9 +++-- tests/backend/test_generics.py | 2 ++ tests/test_fastquery.py | 3 +- 7 files changed, 41 insertions(+), 83 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index d564e242..0eab8c6b 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -177,7 +177,7 @@ def get_spent(conn, transaction_id, output): @register_query(MongoDBConnection) def get_spending_transactions(conn, inputs): - return conn.run( + cursor = conn.run( conn.collection('bigchain').aggregate([ {'$match': { 'block.transactions.inputs.fulfills': { @@ -191,21 +191,18 @@ def get_spending_transactions(conn, inputs): }, }}, ])) + return ((b['id'], b['block']['transactions']) for b in cursor) @register_query(MongoDBConnection) -def get_owned_ids(conn, owner, unwrap=True): +def get_owned_ids(conn, owner): cursor = conn.run( conn.collection('bigchain').aggregate([ {'$match': {'block.transactions.outputs.public_keys': owner}}, {'$unwind': '$block.transactions'}, {'$match': {'block.transactions.outputs.public_keys': owner}} ])) - if not unwrap: - return cursor - # we need to access some nested fields before returning so lets use a - # generator to avoid having to read all records on the cursor at this point - return (elem['block']['transactions'] for elem in cursor) + return ((b['id'], b['block']['transactions']) for b in cursor) @register_query(MongoDBConnection) diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index 6a2c644e..fca65bdf 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -120,16 +120,15 @@ def get_spent(connection, transaction_id, output): @register_query(RethinkDBConnection) -def get_owned_ids(connection, owner, unwrap=True): +def get_owned_ids(connection, owner): query = (r.table('bigchain', read_mode=READ_MODE) .get_all(owner, index='outputs') .distinct() .concat_map(unroll_block_transactions) - .filter(lambda doc: doc['block']['transactions']['outputs'].contains( + .filter(lambda doc: doc['tx']['outputs'].contains( lambda c: c['public_keys'].contains(owner)))) - if unwrap: - query = query.map(lambda doc: doc['block']['transactions']) - return connection.run(query) + cursor = connection.run(query) + return ((b['id'], b['tx']) for b in cursor) @register_query(RethinkDBConnection) @@ -266,9 +265,8 @@ def get_votes_for_blocks_by_voter(connection, block_ids, node_pubkey): def unroll_block_transactions(block): - """ Simulate unrolling a transaction into block in MongoDB """ - return block['block']['transactions'].map( - lambda tx: block.merge({'block': {'transactions': tx}})) + """ Unroll block transactions """ + return block['block']['transactions'].map(lambda tx: block.merge({'tx': tx})) @register_query(RethinkDBConnection) @@ -278,6 +276,7 @@ def get_spending_transactions(connection, links): .get_all(*[(l['txid'], l['output']) for l in links], index='inputs') .concat_map(unroll_block_transactions) .filter(lambda doc: r.expr(links).set_intersection( - doc['block']['transactions']['inputs'].map(lambda i: i['fulfills']))) + doc['tx']['inputs'].map(lambda i: i['fulfills']))) ) - return connection.run(query) + cursor = connection.run(query) + return ((b['id'], b['tx']) for b in cursor) diff --git a/bigchaindb/core.py b/bigchaindb/core.py index be7cdcbc..63cab1d2 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -4,11 +4,10 @@ from time import time from bigchaindb import exceptions as core_exceptions from bigchaindb.common import crypto, exceptions from bigchaindb.common.utils import gen_timestamp, serialize -from bigchaindb.common.transaction import TransactionLink import bigchaindb -from bigchaindb import backend, config_utils, utils, fastquery +from bigchaindb import backend, config_utils, fastquery from bigchaindb.consensus import BaseConsensusRules from bigchaindb.models import Block, Transaction @@ -376,51 +375,6 @@ class Bigchain(object): # Either no transaction was returned spending the `(txid, output)` as # input or the returned transactions are not valid. - def get_outputs(self, owner): - """Retrieve a list of links to transaction outputs for a given public - key. - - Args: - owner (str): base58 encoded public key. - - Returns: - :obj:`list` of TransactionLink: list of ``txid`` s and ``output`` s - pointing to another transaction's condition - """ - # get all transactions in which owner is in the `owners_after` list - response = backend.query.get_owned_ids(self.connection, owner) - return [ - TransactionLink(tx['id'], index) - for tx in response - if not self.is_tx_strictly_in_invalid_block(tx['id']) - for index, output in enumerate(tx['outputs']) - if utils.output_has_owner(output, owner) - ] - - def is_tx_strictly_in_invalid_block(self, txid): - """ - Checks whether the transaction with the given ``txid`` - *strictly* belongs to an invalid block. - - Args: - txid (str): Transaction id. - - Returns: - bool: ``True`` if the transaction *strictly* belongs to a - block that is invalid. ``False`` otherwise. - - Note: - Since a transaction may be in multiple blocks, with - different statuses, the term "strictly" is used to - emphasize that if a transaction is said to be in an invalid - block, it means that it is not in any other block that is - either valid or undecided. - - """ - validity = self.get_blocks_status_containing_tx(txid) - return (Bigchain.BLOCK_VALID not in validity.values() and - Bigchain.BLOCK_UNDECIDED not in validity.values()) - def get_owned_ids(self, owner): """Retrieve a list of ``txid`` s that can be used as inputs. diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py index 7b73ab64..a7147722 100644 --- a/bigchaindb/fastquery.py +++ b/bigchaindb/fastquery.py @@ -9,8 +9,7 @@ class FastQuery: Database queries that join on block results from a single node. * Votes are not validated for security (security is replication concern) - * Votes come from only one node, and as such, fault tolerance is not provided - (elected Blockchain table not yet available) + * Votes come from only one node, and as such, fault tolerance is reduced In return, these queries offer good performance, as it is not neccesary to validate each result separately. @@ -21,20 +20,27 @@ class FastQuery: self.me = me def filter_block_ids(self, block_ids, include_undecided=True): + """ + Given block ids, filter the invalid blocks + """ + block_ids = list(set(block_ids)) votes = query.get_votes_for_blocks_by_voter( self.connection, block_ids, self.me) votes = {v['vote']['voting_for_block']: v['vote']['is_block_valid'] for v in votes} return [b for b in block_ids if votes.get(b, include_undecided)] - def filter_valid_blocks(self, blocks): - block_ids = list(set(b['id'] for b in blocks)) - valid_block_ids = self.filter_block_ids(block_ids) - return [b for b in blocks if b['id'] in valid_block_ids] + def filter_valid_blocks(self, blocks, key=lambda b: b[0]): + """ + Given things with block ids, remove the invalid ones. + """ + blocks = list(blocks) + valid_block_ids = set(self.filter_block_ids(key(b) for b in blocks)) + return [b for b in blocks if key(b) in valid_block_ids] def get_outputs_by_pubkey(self, pubkey): - cursor = query.get_owned_ids(self.connection, pubkey, unwrap=False) - wrapped_txs = self.filter_valid_blocks(list(cursor)) - txs = [wrapped['block']['transactions'] for wrapped in wrapped_txs] + """ Get outputs for a public key """ + res = list(query.get_owned_ids(self.connection, pubkey)) + txs = [tx for _, tx in self.filter_valid_blocks(res)] return [TransactionLink(tx['id'], i) for tx in txs for i, o in enumerate(tx['outputs']) @@ -48,9 +54,9 @@ class FastQuery: outputs: list of TransactionLink """ links = [o.to_dict() for o in outputs] - spending_txs = query.get_spending_transactions(self.connection, links) - wrapped = self.filter_valid_blocks(list(spending_txs)) + res = query.get_spending_transactions(self.connection, links) + txs = [tx for _, tx in self.filter_valid_blocks(res)] spends = {TransactionLink.from_dict(input_['fulfills']) - for block in wrapped - for input_ in block['block']['transactions']['inputs']} + for tx in txs + for input_ in tx['inputs']} return [ff for ff in outputs if ff not in spends] diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index c8a03011..426ac12d 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -205,10 +205,10 @@ def test_get_owned_ids(signed_create_tx, user_pk): block = Block(transactions=[signed_create_tx]) conn.db.bigchain.insert_one(block.to_dict()) - owned_ids = list(query.get_owned_ids(conn, user_pk)) + [(block_id, tx)] = list(query.get_owned_ids(conn, user_pk)) - assert len(owned_ids) == 1 - assert owned_ids[0] == signed_create_tx.to_dict() + assert block_id == block.id + assert tx == signed_create_tx.to_dict() def test_get_votes_by_block_id(signed_create_tx, structurally_valid_vote): @@ -435,8 +435,7 @@ def test_get_spending_transactions(user_pk): links = [inputs[0].fulfills.to_dict(), inputs[2].fulfills.to_dict()] # discard block noise - res = [(r['id'], r['block']['transactions']) - for r in list(query.get_spending_transactions(conn, links))] + res = list(query.get_spending_transactions(conn, links)) # tx3 not a member because input 1 not asked for assert res == [(block.id, tx2.to_dict()), (block.id, tx4.to_dict())] diff --git a/tests/backend/test_generics.py b/tests/backend/test_generics.py index 57a644ee..581a0b31 100644 --- a/tests/backend/test_generics.py +++ b/tests/backend/test_generics.py @@ -36,6 +36,8 @@ def test_schema(schema_func_name, args_qty): ('get_votes_by_block_id_and_voter', 2), ('update_transaction', 2), ('get_transaction_from_block', 2), + ('get_votes_for_blocks_by_voter', 2), + ('get_spending_transactions', 1), )) def test_query(query_func_name, args_qty): from bigchaindb.backend import query diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py index 200bd7f9..c54ef10e 100644 --- a/tests/test_fastquery.py +++ b/tests/test_fastquery.py @@ -35,7 +35,8 @@ def test_filter_block_ids_only_valid(b, blockdata): def test_filter_valid_blocks(b, blockdata): blocks, _ = blockdata - assert b.fastquery.filter_valid_blocks(blocks) == [blocks[0], blocks[1]] + assert (b.fastquery.filter_valid_blocks(blocks, key=lambda b: b['id']) + == [blocks[0], blocks[1]]) def test_get_outputs_by_pubkey(b, user_pk, user2_pk, blockdata): From 2200a7bda417e4d5ef4fd1f3297e7231249dbb85 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 24 Apr 2017 12:21:00 +0200 Subject: [PATCH 05/21] cleanup --- bigchaindb/backend/query.py | 1 - bigchaindb/backend/rethinkdb/query.py | 9 +++++---- tests/backend/mongodb/test_queries.py | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 5ae39557..0f746132 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -207,7 +207,6 @@ def get_votes_for_blocks_by_voter(connection, block_ids, pubkey): Returns: A cursor of votes matching given votes. """ - raise NotImplementedError diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index fca65bdf..18116fc1 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -124,7 +124,7 @@ def get_owned_ids(connection, owner): query = (r.table('bigchain', read_mode=READ_MODE) .get_all(owner, index='outputs') .distinct() - .concat_map(unroll_block_transactions) + .concat_map(unwind_block_transactions) .filter(lambda doc: doc['tx']['outputs'].contains( lambda c: c['public_keys'].contains(owner)))) cursor = connection.run(query) @@ -264,8 +264,8 @@ def get_votes_for_blocks_by_voter(connection, block_ids, node_pubkey): .filter(lambda row: row['node_pubkey'] == node_pubkey)) -def unroll_block_transactions(block): - """ Unroll block transactions """ +def unwind_block_transactions(block): + """ Yield a block for each transaction in given block """ return block['block']['transactions'].map(lambda tx: block.merge({'tx': tx})) @@ -274,7 +274,8 @@ def get_spending_transactions(connection, links): query = ( r.table('bigchain') .get_all(*[(l['txid'], l['output']) for l in links], index='inputs') - .concat_map(unroll_block_transactions) + .concat_map(unwind_block_transactions) + # filter transactions spending output .filter(lambda doc: r.expr(links).set_intersection( doc['tx']['inputs'].map(lambda i: i['fulfills']))) ) diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index 426ac12d..0c295b9b 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -434,7 +434,6 @@ def test_get_spending_transactions(user_pk): conn.db.bigchain.insert_one(block.to_dict()) links = [inputs[0].fulfills.to_dict(), inputs[2].fulfills.to_dict()] - # discard block noise res = list(query.get_spending_transactions(conn, links)) # tx3 not a member because input 1 not asked for From 01ba01083d26ba2c1d22eeedd4089a718f9589a9 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Tue, 25 Apr 2017 13:12:32 +0200 Subject: [PATCH 06/21] update comments --- bigchaindb/backend/query.py | 9 +++++---- bigchaindb/fastquery.py | 17 ++++++++++------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 0f746132..6f76249b 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -148,7 +148,8 @@ def get_spending_transactions(connection, inputs): inputs (list): list of {txid, output} Returns: - List of transactions that spend given inputs + Iterator of (block_ids, transaction) for transactions that + spend given inputs. """ raise NotImplementedError @@ -161,9 +162,9 @@ def get_owned_ids(connection, owner): owner (str): base58 encoded public key. Returns: - A cursor for the matching transactions. + Iterator of (block_id, transaction) for transactions + that list given owner in conditions. """ - raise NotImplementedError @@ -205,7 +206,7 @@ def get_votes_for_blocks_by_voter(connection, block_ids, pubkey): pubkey (str): public key of voting node Returns: - A cursor of votes matching given votes. + A cursor of votes matching given block_ids and public key """ raise NotImplementedError diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py index a7147722..deffabe1 100644 --- a/bigchaindb/fastquery.py +++ b/bigchaindb/fastquery.py @@ -4,24 +4,27 @@ from bigchaindb.common.transaction import TransactionLink class FastQuery: - """ Database queries that join on block results from a single node. - * Votes are not validated for security (security is replication concern) - * Votes come from only one node, and as such, fault tolerance is reduced + * Votes are not validated for security (security is a replication concern) + * Votes come from only one node, and as such, non-byzantine fault tolerance + is reduced. - In return, these queries offer good performance, as it is not neccesary to validate - each result separately. + Previously, to consider the status of a block, all votes for that block + were retrieved and the election results were counted. This meant that a + faulty node may still have been able to obtain a correct election result. + However, from the point of view of a client, it is still neccesary to + query multiple nodes to insure against getting an incorrect response from + a byzantine node. """ - def __init__(self, connection, me): self.connection = connection self.me = me def filter_block_ids(self, block_ids, include_undecided=True): """ - Given block ids, filter the invalid blocks + Given block ids, filter the invalid blocks. """ block_ids = list(set(block_ids)) votes = query.get_votes_for_blocks_by_voter( From b4f14b26ce264e7c24053b6e9be4cbea21ade054 Mon Sep 17 00:00:00 2001 From: Scott Sadler Date: Mon, 8 May 2017 14:25:39 +0200 Subject: [PATCH 07/21] address nomenclature issues --- bigchaindb/core.py | 2 +- bigchaindb/fastquery.py | 39 ++++++++++++++++++++--------------- tests/db/test_bigchain_api.py | 4 ++-- tests/test_fastquery.py | 20 +++++++++--------- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 63cab1d2..91666224 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -395,7 +395,7 @@ class Bigchain(object): """ Get a list of output links filtered on some criteria """ - outputs = self.fastquery.get_outputs_by_pubkey(owner) + outputs = self.fastquery.get_outputs_by_public_key(owner) if not include_spent: outputs = self.fastquery.filter_spent_outputs(outputs) return outputs diff --git a/bigchaindb/fastquery.py b/bigchaindb/fastquery.py index deffabe1..d19294ce 100644 --- a/bigchaindb/fastquery.py +++ b/bigchaindb/fastquery.py @@ -22,32 +22,37 @@ class FastQuery: self.connection = connection self.me = me - def filter_block_ids(self, block_ids, include_undecided=True): + def filter_valid_block_ids(self, block_ids, include_undecided=False): """ - Given block ids, filter the invalid blocks. + Given block ids, return only the ones that are valid. """ block_ids = list(set(block_ids)) votes = query.get_votes_for_blocks_by_voter( self.connection, block_ids, self.me) - votes = {v['vote']['voting_for_block']: v['vote']['is_block_valid'] for v in votes} - return [b for b in block_ids if votes.get(b, include_undecided)] + votes = {vote['vote']['voting_for_block']: vote['vote']['is_block_valid'] + for vote in votes} + return [block_id for block_id in block_ids + if votes.get(block_id, include_undecided)] - def filter_valid_blocks(self, blocks, key=lambda b: b[0]): + def filter_valid_items(self, items, block_id_key=lambda b: b[0]): """ - Given things with block ids, remove the invalid ones. + Given items with block ids, return only the ones that are valid or undecided. """ - blocks = list(blocks) - valid_block_ids = set(self.filter_block_ids(key(b) for b in blocks)) - return [b for b in blocks if key(b) in valid_block_ids] + items = list(items) + block_ids = map(block_id_key, items) + valid_block_ids = set(self.filter_valid_block_ids(block_ids, True)) + return [b for b in items if block_id_key(b) in valid_block_ids] - def get_outputs_by_pubkey(self, pubkey): - """ Get outputs for a public key """ - res = list(query.get_owned_ids(self.connection, pubkey)) - txs = [tx for _, tx in self.filter_valid_blocks(res)] - return [TransactionLink(tx['id'], i) + def get_outputs_by_public_key(self, public_key): + """ + Get outputs for a public key + """ + res = list(query.get_owned_ids(self.connection, public_key)) + txs = [tx for _, tx in self.filter_valid_items(res)] + return [TransactionLink(tx['id'], index) for tx in txs - for i, o in enumerate(tx['outputs']) - if output_has_owner(o, pubkey)] + for index, output in enumerate(tx['outputs']) + if output_has_owner(output, public_key)] def filter_spent_outputs(self, outputs): """ @@ -58,7 +63,7 @@ class FastQuery: """ links = [o.to_dict() for o in outputs] res = query.get_spending_transactions(self.connection, links) - txs = [tx for _, tx in self.filter_valid_blocks(res)] + txs = [tx for _, tx in self.filter_valid_items(res)] spends = {TransactionLink.from_dict(input_['fulfills']) for tx in txs for input_ in tx['inputs']} diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 589671e5..8d17a9e7 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -1119,7 +1119,7 @@ def test_get_owned_ids_calls_get_outputs_filtered(): def test_get_outputs_filtered_only_unspent(): from bigchaindb.common.transaction import TransactionLink from bigchaindb.core import Bigchain - with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_pubkey') as get_outputs: + with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_public_key') as get_outputs: get_outputs.return_value = [TransactionLink('a', 1), TransactionLink('b', 2)] with patch('bigchaindb.fastquery.FastQuery.filter_spent_outputs') as filter_spent: @@ -1132,7 +1132,7 @@ def test_get_outputs_filtered_only_unspent(): def test_get_outputs_filtered(): from bigchaindb.common.transaction import TransactionLink from bigchaindb.core import Bigchain - with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_pubkey') as get_outputs: + with patch('bigchaindb.fastquery.FastQuery.get_outputs_by_public_key') as get_outputs: get_outputs.return_value = [TransactionLink('a', 1), TransactionLink('b', 2)] with patch('bigchaindb.fastquery.FastQuery.filter_spent_outputs') as filter_spent: diff --git a/tests/test_fastquery.py b/tests/test_fastquery.py index c54ef10e..8fb3378c 100644 --- a/tests/test_fastquery.py +++ b/tests/test_fastquery.py @@ -21,30 +21,30 @@ def blockdata(b, user_pk, user2_pk): return blocks, [b['id'] for b in blocks] -def test_filter_block_ids_with_undecided(b, blockdata): +def test_filter_valid_block_ids_with_undecided(b, blockdata): blocks, block_ids = blockdata - valid_ids = b.fastquery.filter_block_ids(block_ids) + valid_ids = b.fastquery.filter_valid_block_ids(block_ids, include_undecided=True) assert set(valid_ids) == {blocks[0]['id'], blocks[1]['id']} -def test_filter_block_ids_only_valid(b, blockdata): +def test_filter_valid_block_ids_only_valid(b, blockdata): blocks, block_ids = blockdata - valid_ids = b.fastquery.filter_block_ids(block_ids, include_undecided=False) + valid_ids = b.fastquery.filter_valid_block_ids(block_ids) assert set(valid_ids) == {blocks[1]['id']} -def test_filter_valid_blocks(b, blockdata): +def test_filter_valid_items(b, blockdata): blocks, _ = blockdata - assert (b.fastquery.filter_valid_blocks(blocks, key=lambda b: b['id']) + assert (b.fastquery.filter_valid_items(blocks, block_id_key=lambda b: b['id']) == [blocks[0], blocks[1]]) -def test_get_outputs_by_pubkey(b, user_pk, user2_pk, blockdata): +def test_get_outputs_by_public_key(b, user_pk, user2_pk, blockdata): blocks, _ = blockdata - assert b.fastquery.get_outputs_by_pubkey(user_pk) == [ + assert b.fastquery.get_outputs_by_public_key(user_pk) == [ TransactionLink(blocks[1]['block']['transactions'][0]['id'], 0) ] - assert b.fastquery.get_outputs_by_pubkey(user2_pk) == [ + assert b.fastquery.get_outputs_by_public_key(user2_pk) == [ TransactionLink(blocks[0]['block']['transactions'][0]['id'], 0) ] @@ -76,7 +76,7 @@ def test_filter_spent_outputs(b, user_pk): block = Block([tx4]) b.write_block(block) - outputs = b.fastquery.get_outputs_by_pubkey(user_pk) + outputs = b.fastquery.get_outputs_by_public_key(user_pk) unspents = b.fastquery.filter_spent_outputs(outputs) assert set(unspents) == { From 92392b51a7a6e1eda3263d17d5ca3ffc2bf46f2c Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 10 May 2017 16:43:52 +0200 Subject: [PATCH 08/21] Initial implementation to decouple assets from transactions. Most changes done to how we write and read blocks to the database. Created schema, indexes and queries for mongodb. Fixed tests. --- bigchaindb/backend/mongodb/query.py | 38 +++++++++++++++--- bigchaindb/backend/mongodb/schema.py | 13 ++++++- bigchaindb/backend/query.py | 14 ++++++- bigchaindb/backend/rethinkdb/query.py | 2 +- bigchaindb/core.py | 48 ++++++++++++++++++----- bigchaindb/models.py | 56 +++++++++++++++++++++++++++ bigchaindb/pipelines/vote.py | 2 +- tests/backend/mongodb/test_queries.py | 13 ++++--- tests/backend/mongodb/test_schema.py | 9 ++++- tests/backend/test_generics.py | 4 +- tests/db/test_bigchain_api.py | 2 +- tests/pipelines/test_vote.py | 18 +++++++-- 12 files changed, 187 insertions(+), 32 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 74b9c35a..39d99d4a 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -7,7 +7,7 @@ from pymongo import ReturnDocument from bigchaindb import backend from bigchaindb.common.exceptions import CyclicBlockchainError from bigchaindb.common.transaction import Transaction -from bigchaindb.backend.exceptions import DuplicateKeyError +from bigchaindb.backend.exceptions import DuplicateKeyError, OperationError from bigchaindb.backend.utils import module_dispatch_registrar from bigchaindb.backend.mongodb.connection import MongoDBConnection @@ -127,6 +127,7 @@ def get_txids_filtered(conn, asset_id, operation=None): return (elem['block']['transactions']['id'] for elem in cursor) +# TODO: This doesn't seem to be used anywhere @register_query(MongoDBConnection) def get_asset_by_id(conn, asset_id): cursor = conn.run( @@ -206,10 +207,10 @@ def get_votes_by_block_id_and_voter(conn, block_id, node_pubkey): @register_query(MongoDBConnection) -def write_block(conn, block): +def write_block(conn, block_dict): return conn.run( conn.collection('bigchain') - .insert_one(block.to_dict())) + .insert_one(block_dict)) @register_query(MongoDBConnection) @@ -220,6 +221,31 @@ def get_block(conn, block_id): projection={'_id': False})) +@register_query(MongoDBConnection) +def write_assets(conn, assets): + try: + # unordered means that all the inserts will be attempted instead of + # stopping after the first error. + return conn.run( + conn.collection('assets') + .insert_many(assets, ordered=False)) + # This can happen if we try to write the same asset multiple times. + # One case is when we write the same transaction into multiple blocks due + # to invalid blocks. + # The actual mongodb exception is a BulkWriteError due to a duplicated key + # in one of the inserts. + except OperationError: + return + + +@register_query(MongoDBConnection) +def get_assets(conn, asset_ids): + return conn.run( + conn.collection('assets') + .find({'id': {'$in': asset_ids}}, + projection={'_id': False})) + + @register_query(MongoDBConnection) def count_blocks(conn): return conn.run( @@ -252,7 +278,7 @@ def get_genesis_block(conn): @register_query(MongoDBConnection) -def get_last_voted_block(conn, node_pubkey): +def get_last_voted_block_id(conn, node_pubkey): last_voted = conn.run( conn.collection('votes') .find({'node_pubkey': node_pubkey}, @@ -261,7 +287,7 @@ def get_last_voted_block(conn, node_pubkey): # pymongo seems to return a cursor even if there are no results # so we actually need to check the count if last_voted.count() == 0: - return get_genesis_block(conn) + return get_genesis_block(conn)['id'] mapping = {v['vote']['previous_block']: v['vote']['voting_for_block'] for v in last_voted} @@ -279,7 +305,7 @@ def get_last_voted_block(conn, node_pubkey): except KeyError: break - return get_block(conn, last_block_id) + return last_block_id @register_query(MongoDBConnection) diff --git a/bigchaindb/backend/mongodb/schema.py b/bigchaindb/backend/mongodb/schema.py index ad89f9bc..527476f0 100644 --- a/bigchaindb/backend/mongodb/schema.py +++ b/bigchaindb/backend/mongodb/schema.py @@ -27,7 +27,7 @@ def create_database(conn, dbname): @register_schema(MongoDBConnection) def create_tables(conn, dbname): - for table_name in ['bigchain', 'backlog', 'votes']: + for table_name in ['bigchain', 'backlog', 'votes', 'assets']: logger.info('Create `%s` table.', table_name) # create the table # TODO: read and write concerns can be declared here @@ -39,6 +39,7 @@ def create_indexes(conn, dbname): create_bigchain_secondary_index(conn, dbname) create_backlog_secondary_index(conn, dbname) create_votes_secondary_index(conn, dbname) + create_assets_secondary_index(conn, dbname) @register_schema(MongoDBConnection) @@ -102,3 +103,13 @@ def create_votes_secondary_index(conn, dbname): ASCENDING)], name='block_and_voter', unique=True) + + +def create_assets_secondary_index(conn, dbname): + logger.info('Create `assets` secondary index.') + + # is the first index redundant then? + # compound index to order votes by block id and node + conn.conn[dbname]['assets'].create_index('id', + name='asset_id', + unique=True) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 9aa653d7..8f1325ef 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -211,6 +211,18 @@ def get_block(connection, block_id): raise NotImplementedError +@singledispatch +def write_assets(connection, assets): + # TODO: write docstring + raise NotImplementedError + + +@singledispatch +def get_assets(connection, assets): + # TODO: write docstring + raise NotImplementedError + + @singledispatch def count_blocks(connection): """Count the number of blocks in the bigchain table. @@ -259,7 +271,7 @@ def get_genesis_block(connection): @singledispatch -def get_last_voted_block(connection, node_pubkey): +def get_last_voted_block_id(connection, node_pubkey): """Get the last voted block for a specific node. Args: diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index 6011cc8c..be20442a 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -188,7 +188,7 @@ def get_genesis_block(connection): @register_query(RethinkDBConnection) -def get_last_voted_block(connection, node_pubkey): +def get_last_voted_block_id(connection, node_pubkey): try: # get the latest value for the vote timestamp (over all votes) max_timestamp = connection.run( diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 5d2e9c03..c6ff5608 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -183,15 +183,23 @@ class Bigchain(object): include_status (bool): also return the status of the block the return value is then a tuple: (block, status) """ - block = backend.query.get_block(self.connection, block_id) - status = None + # get block from database + block_dict = backend.query.get_block(self.connection, block_id) + # get the asset ids from the block + if block_dict: + asset_ids = Block.get_asset_ids(block_dict) + # get the assets from the database + assets = self.get_assets(asset_ids) + # add the assets to the block transactions + block_dict = Block.couple_assets(block_dict, assets) + status = None if include_status: - if block: - status = self.block_election_status(block) - return block, status + if block_dict: + status = self.block_election_status(block_dict) + return block_dict, status else: - return block + return block_dict def get_transaction(self, txid, include_status=False): """Get the transaction with the specified `txid` (and optionally its status) @@ -251,7 +259,13 @@ class Bigchain(object): tx_status = self.TX_IN_BACKLOG if response: - response = Transaction.from_dict(response) + if tx_status == self.TX_IN_BACKLOG: + response = Transaction.from_dict(response) + else: + # If we are reading from the bigchain collection the asset is + # not in the transaction so we need to fetch the asset and + # reconstruct the transaction. + response = Transaction.from_db(self, response) if include_status: return response, tx_status @@ -513,7 +527,14 @@ class Bigchain(object): block (Block): block to write to bigchain. """ - return backend.query.write_block(self.connection, block) + # Decouple assets from block + assets, block_dict = block.decouple_assets() + # write the assets + if assets: + self.write_assets(assets) + + # write the block + return backend.query.write_block(self.connection, block_dict) def prepare_genesis_block(self): """Prepare a genesis block.""" @@ -592,7 +613,9 @@ class Bigchain(object): def get_last_voted_block(self): """Returns the last block that this node voted on.""" - return Block.from_dict(backend.query.get_last_voted_block(self.connection, self.me)) + last_block_id = backend.query.get_last_voted_block_id(self.connection, + self.me) + return Block.from_dict(self.get_block(last_block_id)) def get_unvoted_blocks(self): """Return all the blocks that have not been voted on by this node. @@ -616,3 +639,10 @@ class Bigchain(object): """Tally the votes on a block, and return the status: valid, invalid, or undecided.""" return self.block_election(block)['status'] + + def get_assets(self, asset_ids): + # TODO: write docstrings + return backend.query.get_assets(self.connection, asset_ids) + + def write_assets(self, assets): + return backend.query.write_assets(self.connection, assets) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 2f46ba20..5de56aaf 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -1,3 +1,5 @@ +from copy import deepcopy + from bigchaindb.common.crypto import hash_data, PublicKey, PrivateKey from bigchaindb.common.exceptions import (InvalidHash, InvalidSignature, DoubleSpend, InputDoesNotExist, @@ -84,6 +86,16 @@ class Transaction(Transaction): validate_transaction_schema(tx_body) return super().from_dict(tx_body) + @classmethod + def from_db(cls, bigchain, tx_dict): + # TODO: write docstring + if tx_dict['operation'] in [Transaction.CREATE, Transaction.CREATE]: + asset = bigchain.get_assets([tx_dict['id']])[0] + asset.pop('id') + tx_dict.update({'asset': asset}) + + return cls.from_dict(tx_dict) + class Block(object): """Bundle a list of Transactions in a Block. Nodes vote on its validity. @@ -300,5 +312,49 @@ class Block(object): 'signature': self.signature, } + @classmethod + def from_db(cls, bigchain, block_dict): + asset_ids = cls.get_asset_ids(block_dict) + assets = bigchain.get_assets(asset_ids) + block_dict = cls.couple_assets(block_dict, assets) + return cls.from_dict(block_dict) + + def decouple_assets(self): + # TODO: Write documentation + block_dict = deepcopy(self.to_dict()) + assets = [] + for transaction in block_dict['block']['transactions']: + if transaction['operation'] in [Transaction.CREATE, + Transaction.GENESIS]: + asset = transaction.pop('asset') + asset.update({'id': transaction['id']}) + assets.append(asset) + + return (assets, block_dict) + + @staticmethod + def couple_assets(block_dict, assets): + # TODO: Write docstring + # create a dict with {'': asset} + assets = {asset.pop('id'): asset for asset in assets} + # add the assets to the block transactions + for transaction in block_dict['block']['transactions']: + if transaction['operation'] in [Transaction.CREATE, + Transaction.GENESIS]: + transaction.update({'asset': assets.get(transaction['id'], + None)}) + return block_dict + + @staticmethod + def get_asset_ids(block_dict): + # TODO: Write docstring + asset_ids = [] + for transaction in block_dict['block']['transactions']: + if transaction['operation'] in [Transaction.CREATE, + Transaction.GENESIS]: + asset_ids.append(transaction['id']) + + return asset_ids + def to_str(self): return serialize(self.to_dict()) diff --git a/bigchaindb/pipelines/vote.py b/bigchaindb/pipelines/vote.py index 9664c520..e90ce6c4 100644 --- a/bigchaindb/pipelines/vote.py +++ b/bigchaindb/pipelines/vote.py @@ -50,7 +50,7 @@ class Vote: def validate_block(self, block): if not self.bigchain.has_previous_vote(block['id']): try: - block = Block.from_dict(block) + block = Block.from_db(self.bigchain, block) except (exceptions.InvalidHash): # XXX: if a block is invalid we should skip the `validate_tx` # step, but since we are in a pipeline we cannot just jump to diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index bd7e75f1..1363d9d7 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -269,7 +269,7 @@ def test_write_block(signed_create_tx): # create and write block block = Block(transactions=[signed_create_tx]) - query.write_block(conn, block) + query.write_block(conn, block.to_dict()) block_db = conn.db.bigchain.find_one({'id': block.id}, {'_id': False}) @@ -347,17 +347,18 @@ def test_get_genesis_block(genesis_block): from bigchaindb.backend import connect, query conn = connect() - assert query.get_genesis_block(conn) == genesis_block.to_dict() + assets, genesis_block_dict = genesis_block.decouple_assets() + assert query.get_genesis_block(conn) == genesis_block_dict -def test_get_last_voted_block(genesis_block, signed_create_tx, b): +def test_get_last_voted_block_id(genesis_block, signed_create_tx, b): from bigchaindb.backend import connect, query from bigchaindb.models import Block from bigchaindb.common.exceptions import CyclicBlockchainError conn = connect() # check that the last voted block is the genesis block - assert query.get_last_voted_block(conn, b.me) == genesis_block.to_dict() + assert query.get_last_voted_block_id(conn, b.me) == genesis_block.id # create and insert a new vote and block block = Block(transactions=[signed_create_tx]) @@ -365,7 +366,7 @@ def test_get_last_voted_block(genesis_block, signed_create_tx, b): vote = b.vote(block.id, genesis_block.id, True) conn.db.votes.insert_one(vote) - assert query.get_last_voted_block(conn, b.me) == block.to_dict() + assert query.get_last_voted_block_id(conn, b.me) == block.id # force a bad chain vote.pop('_id') @@ -374,7 +375,7 @@ def test_get_last_voted_block(genesis_block, signed_create_tx, b): conn.db.votes.insert_one(vote) with pytest.raises(CyclicBlockchainError): - query.get_last_voted_block(conn, b.me) + query.get_last_voted_block_id(conn, b.me) def test_get_unvoted_blocks(signed_create_tx): diff --git a/tests/backend/mongodb/test_schema.py b/tests/backend/mongodb/test_schema.py index 71eac7ff..e3b320bd 100644 --- a/tests/backend/mongodb/test_schema.py +++ b/tests/backend/mongodb/test_schema.py @@ -18,7 +18,8 @@ def test_init_creates_db_tables_and_indexes(): init_database() collection_names = conn.conn[dbname].collection_names() - assert sorted(collection_names) == ['backlog', 'bigchain', 'votes'] + assert sorted(collection_names) == ['assets', 'backlog', 'bigchain', + 'votes'] indexes = conn.conn[dbname]['bigchain'].index_information().keys() assert sorted(indexes) == ['_id_', 'asset_id', 'block_timestamp', 'inputs', @@ -31,6 +32,9 @@ def test_init_creates_db_tables_and_indexes(): indexes = conn.conn[dbname]['votes'].index_information().keys() assert sorted(indexes) == ['_id_', 'block_and_voter'] + indexes = conn.conn[dbname]['assets'].index_information().keys() + assert sorted(indexes) == ['_id_', 'asset_id'] + def test_init_database_fails_if_db_exists(): import bigchaindb @@ -62,7 +66,8 @@ def test_create_tables(): schema.create_tables(conn, dbname) collection_names = conn.conn[dbname].collection_names() - assert sorted(collection_names) == ['backlog', 'bigchain', 'votes'] + assert sorted(collection_names) == ['assets', 'backlog', 'bigchain', + 'votes'] def test_create_secondary_indexes(): diff --git a/tests/backend/test_generics.py b/tests/backend/test_generics.py index 57a644ee..6a1e9447 100644 --- a/tests/backend/test_generics.py +++ b/tests/backend/test_generics.py @@ -30,12 +30,14 @@ def test_schema(schema_func_name, args_qty): ('write_block', 1), ('get_block', 1), ('write_vote', 1), - ('get_last_voted_block', 1), + ('get_last_voted_block_id', 1), ('get_unvoted_blocks', 1), ('get_spent', 2), ('get_votes_by_block_id_and_voter', 2), ('update_transaction', 2), ('get_transaction_from_block', 2), + ('write_assets', 1), + ('get_assets', 1), )) def test_query(query_func_name, args_qty): from bigchaindb.backend import query diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 3f05385c..5960f171 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -383,7 +383,7 @@ class TestBigchainApi(object): from bigchaindb.backend import query genesis = query.get_genesis_block(b.connection) - genesis = Block.from_dict(genesis) + genesis = Block.from_db(b, genesis) gb = b.get_last_voted_block() assert gb == genesis assert b.validate_block(gb) == gb diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index 7df7ca11..29523035 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -20,6 +20,15 @@ def dummy_block(b): return block +def decouple_assets(b, block): + # the block comming from the database does not contain the assets + # so we need to pass the block without the assets and store the assets + # so that the voting pipeline can reconstruct it + assets, block_dict = block.decouple_assets() + b.write_assets(assets) + return block_dict + + DUMMY_SHA3 = '0123456789abcdef' * 4 @@ -79,9 +88,10 @@ def test_vote_validate_block(b): tx = dummy_tx(b) block = b.create_block([tx]) + block_dict = decouple_assets(b, block) vote_obj = vote.Vote() - validation = vote_obj.validate_block(block.to_dict()) + validation = vote_obj.validate_block(block_dict) assert validation[0] == block.id for tx1, tx2 in zip(validation[1], block.transactions): assert tx1 == tx2 @@ -220,8 +230,9 @@ def test_valid_block_voting_multiprocessing(b, genesis_block, monkeypatch): vote_pipeline.setup(indata=inpipe, outdata=outpipe) block = dummy_block(b) + block_dict = decouple_assets(b, block) - inpipe.put(block.to_dict()) + inpipe.put(block_dict) vote_pipeline.start() vote_out = outpipe.get() vote_pipeline.terminate() @@ -257,6 +268,7 @@ def test_valid_block_voting_with_create_transaction(b, monkeypatch.setattr('time.time', lambda: 1111111111) block = b.create_block([tx]) + block_dict = decouple_assets(b, block) inpipe = Pipe() outpipe = Pipe() @@ -264,7 +276,7 @@ def test_valid_block_voting_with_create_transaction(b, vote_pipeline = vote.create_pipeline() vote_pipeline.setup(indata=inpipe, outdata=outpipe) - inpipe.put(block.to_dict()) + inpipe.put(block_dict) vote_pipeline.start() vote_out = outpipe.get() vote_pipeline.terminate() From 8c0dbeb28199ffab64619b281f04fb94e39fd05e Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 10 May 2017 17:55:43 +0200 Subject: [PATCH 09/21] Added asset decoupling support for rethinkdb Updated schema. Created queries for assets table. Fixed tests. --- bigchaindb/backend/rethinkdb/query.py | 25 +++++++++++++++++++------ bigchaindb/backend/rethinkdb/schema.py | 2 +- bigchaindb/models.py | 3 ++- tests/backend/rethinkdb/test_schema.py | 3 ++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/bigchaindb/backend/rethinkdb/query.py b/bigchaindb/backend/rethinkdb/query.py index be20442a..417bcd93 100644 --- a/bigchaindb/backend/rethinkdb/query.py +++ b/bigchaindb/backend/rethinkdb/query.py @@ -6,6 +6,7 @@ import rethinkdb as r from bigchaindb import backend, utils from bigchaindb.common import exceptions from bigchaindb.common.transaction import Transaction +from bigchaindb.common.utils import serialize from bigchaindb.backend.utils import module_dispatch_registrar from bigchaindb.backend.rethinkdb.connection import RethinkDBConnection @@ -147,10 +148,10 @@ def get_votes_by_block_id_and_voter(connection, block_id, node_pubkey): @register_query(RethinkDBConnection) -def write_block(connection, block): +def write_block(connection, block_dict): return connection.run( r.table('bigchain') - .insert(r.json(block.to_str()), durability=WRITE_DURABILITY)) + .insert(r.json(serialize(block_dict)), durability=WRITE_DURABILITY)) @register_query(RethinkDBConnection) @@ -158,6 +159,20 @@ def get_block(connection, block_id): return connection.run(r.table('bigchain').get(block_id)) +@register_query(RethinkDBConnection) +def write_assets(connection, assets): + return connection.run( + r.table('assets') + .insert(assets, durability=WRITE_DURABILITY)) + + +@register_query(RethinkDBConnection) +def get_assets(connection, asset_ids): + return connection.run( + r.table('assets', read_mode=READ_MODE) + .get_all(*asset_ids)) + + @register_query(RethinkDBConnection) def count_blocks(connection): return connection.run( @@ -203,7 +218,7 @@ def get_last_voted_block_id(connection, node_pubkey): except r.ReqlNonExistenceError: # return last vote if last vote exists else return Genesis block - return get_genesis_block(connection) + return get_genesis_block(connection)['id'] # Now the fun starts. Since the resolution of timestamp is a second, # we might have more than one vote per timestamp. If this is the case @@ -235,9 +250,7 @@ def get_last_voted_block_id(connection, node_pubkey): except KeyError: break - return connection.run( - r.table('bigchain', read_mode=READ_MODE) - .get(last_block_id)) + return last_block_id @register_query(RethinkDBConnection) diff --git a/bigchaindb/backend/rethinkdb/schema.py b/bigchaindb/backend/rethinkdb/schema.py index 997ec5fc..8f0f6b9c 100644 --- a/bigchaindb/backend/rethinkdb/schema.py +++ b/bigchaindb/backend/rethinkdb/schema.py @@ -23,7 +23,7 @@ def create_database(connection, dbname): @register_schema(RethinkDBConnection) def create_tables(connection, dbname): - for table_name in ['bigchain', 'backlog', 'votes']: + for table_name in ['bigchain', 'backlog', 'votes', 'assets']: logger.info('Create `%s` table.', table_name) connection.run(r.db(dbname).table_create(table_name)) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 5de56aaf..81ca898c 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -90,7 +90,8 @@ class Transaction(Transaction): def from_db(cls, bigchain, tx_dict): # TODO: write docstring if tx_dict['operation'] in [Transaction.CREATE, Transaction.CREATE]: - asset = bigchain.get_assets([tx_dict['id']])[0] + # TODO: Maybe replace this call to a call to get_asset_by_id + asset = list(bigchain.get_assets([tx_dict['id']]))[0] asset.pop('id') tx_dict.update({'asset': asset}) diff --git a/tests/backend/rethinkdb/test_schema.py b/tests/backend/rethinkdb/test_schema.py index e19dfdc2..6f77b672 100644 --- a/tests/backend/rethinkdb/test_schema.py +++ b/tests/backend/rethinkdb/test_schema.py @@ -63,7 +63,8 @@ def test_create_tables(): assert conn.run(r.db(dbname).table_list().contains('bigchain')) is True assert conn.run(r.db(dbname).table_list().contains('backlog')) is True assert conn.run(r.db(dbname).table_list().contains('votes')) is True - assert len(conn.run(r.db(dbname).table_list())) == 3 + assert conn.run(r.db(dbname).table_list().contains('assets')) is True + assert len(conn.run(r.db(dbname).table_list())) == 4 @pytest.mark.bdb From aacba571f85f317882547c97b8c132281cfae2c0 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Thu, 11 May 2017 11:38:07 +0200 Subject: [PATCH 10/21] Added docstrings and tests to new methods --- bigchaindb/backend/mongodb/schema.py | 4 +- bigchaindb/backend/query.py | 21 +++- bigchaindb/models.py | 51 +++++++++- tests/backend/mongodb/test_queries.py | 46 +++++++++ tests/test_block_model.py | 133 ++++++++++++++++++++++++++ 5 files changed, 247 insertions(+), 8 deletions(-) diff --git a/bigchaindb/backend/mongodb/schema.py b/bigchaindb/backend/mongodb/schema.py index 527476f0..12b873e0 100644 --- a/bigchaindb/backend/mongodb/schema.py +++ b/bigchaindb/backend/mongodb/schema.py @@ -108,8 +108,8 @@ def create_votes_secondary_index(conn, dbname): def create_assets_secondary_index(conn, dbname): logger.info('Create `assets` secondary index.') - # is the first index redundant then? - # compound index to order votes by block id and node + # unique index on the id of the asset. + # the id is the txid of the transaction that created the asset conn.conn[dbname]['assets'].create_index('id', name='asset_id', unique=True) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 8f1325ef..5c37647c 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -213,13 +213,28 @@ def get_block(connection, block_id): @singledispatch def write_assets(connection, assets): - # TODO: write docstring + """Write a list of assets to the assets table. + + Args: + assets (list): a list of assets to write. + + Returns: + The database response. + """ raise NotImplementedError @singledispatch -def get_assets(connection, assets): - # TODO: write docstring +def get_assets(connection, asset_ids): + """Get a list of assets from the assets table. + + Args: + asset_ids (list): a of list of ids for the assets to be retrieved from + the database. + + Returns: + assets (list): the list of returned assets. + """ raise NotImplementedError diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 81ca898c..53266930 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -315,13 +315,33 @@ class Block(object): @classmethod def from_db(cls, bigchain, block_dict): + """ + Helper method that reconstructs a block_dict that was returned from + the database. If checks what asset_ids to retrieve, retrieves the + assets from the assets table and reconstructs the block. + + Args: + bigchain (:class:`~bigchaindb.Bigchain`): An instance of Bigchain + used to perform database queries. + + Returns: + :class:`~Block` + + """ asset_ids = cls.get_asset_ids(block_dict) assets = bigchain.get_assets(asset_ids) block_dict = cls.couple_assets(block_dict, assets) return cls.from_dict(block_dict) def decouple_assets(self): - # TODO: Write documentation + """ + Extracts the assets from the `CREATE` transactions in the block. + + Returns: + tuple: (assets, block) with the assets being a list of dicts and + the block being the dict of the block with no assets in the CREATE + transactions. + """ block_dict = deepcopy(self.to_dict()) assets = [] for transaction in block_dict['block']['transactions']: @@ -335,7 +355,20 @@ class Block(object): @staticmethod def couple_assets(block_dict, assets): - # TODO: Write docstring + """ + Give a block_dict with not assets (as returned from a database call) + and a list of assets, reconstruct the original block by puting the + assets back into the `CREATE` transactions in the block. + + Args: + block_dict (:obj:`dict`): The block dict as returned from a + database call. + assets (:obj:`list` of :obj:`dict`): A list of assets returned from + a database call. + + Returns: + dict: The dict of the reconstructed block. + """ # create a dict with {'': asset} assets = {asset.pop('id'): asset for asset in assets} # add the assets to the block transactions @@ -348,7 +381,19 @@ class Block(object): @staticmethod def get_asset_ids(block_dict): - # TODO: Write docstring + """ + Given a block_dict return all the asset_ids for that block (the txid + of CREATE transactions). Usefull to know which assets to retrieve + from the database to reconstruct the block. + + Args: + block_dict (:obj:`dict`): The block dict as returned from a + database call. + + Returns: + list: The list of asset_ids in the block. + + """ asset_ids = [] for transaction in block_dict['block']['transactions']: if transaction['operation'] in [Transaction.CREATE, diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index 1363d9d7..c43c5fa4 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -1,4 +1,7 @@ +from copy import deepcopy + import pytest +import pymongo pytestmark = pytest.mark.bdb @@ -418,3 +421,46 @@ def test_get_txids_filtered(signed_create_tx, signed_transfer_tx): # Test get by asset and TRANSFER txids = set(query.get_txids_filtered(conn, asset_id, Transaction.TRANSFER)) assert txids == {signed_transfer_tx.id} + + +def test_write_assets(): + from bigchaindb.backend import connect, query + conn = connect() + + assets = [ + {'id': 1, 'data': '1'}, + {'id': 2, 'data': '2'}, + {'id': 3, 'data': '3'}, + # Duplicated id. Should not be written to the database + {'id': 1, 'data': '1'}, + ] + + # write the assets + query.write_assets(conn, deepcopy(assets)) + + # check that 3 assets were written to the database + cursor = conn.db.assets.find({}, projection={'_id': False})\ + .sort('id', pymongo.ASCENDING) + + assert cursor.count() == 3 + assert list(cursor) == assets[:-1] + + +def test_get_assets(): + from bigchaindb.backend import connect, query + conn = connect() + + assets = [ + {'id': 1, 'data': '1'}, + {'id': 2, 'data': '2'}, + {'id': 3, 'data': '3'}, + ] + + # write the assets + conn.db.assets.insert_many(deepcopy(assets), ordered=False) + + # read only 2 assets + cursor = query.get_assets(conn, [1, 3]) + + assert cursor.count() == 2 + assert list(cursor.sort('id', pymongo.ASCENDING)) == assets[::2] diff --git a/tests/test_block_model.py b/tests/test_block_model.py index 6e559cb2..981a64d7 100644 --- a/tests/test_block_model.py +++ b/tests/test_block_model.py @@ -153,3 +153,136 @@ class TestBlockModel(object): block = b.create_block([tx, tx]) with raises(DuplicateTransaction): block._validate_block(b) + + def test_decouple_assets(self, b): + from bigchaindb.models import Block, Transaction + + assets = [ + {'msg': '1'}, + {'msg': '2'}, + {'msg': '3'}, + ] + + txs = [] + # create 3 assets + for asset in assets: + tx = Transaction.create([b.me], [([b.me], 1)], asset=asset) + txs.append(tx) + + # create a `TRANSFER` transaction. + # the asset in `TRANSFER` transactions is not extracted + tx = Transaction.transfer(txs[0].to_inputs(), [([b.me], 1)], + asset_id=txs[0].id) + txs.append(tx) + + # create the block + block = Block(txs) + # decouple assets + assets_from_block, block_dict = block.decouple_assets() + + assert len(assets_from_block) == 3 + for i in range(3): + assert assets_from_block[i]['data'] == assets[i] + assert assets_from_block[i]['id'] == txs[i].id + + # check the `TRANSFER` transaction was not changed + assert block.transactions[3].to_dict() == \ + block_dict['block']['transactions'][3] + + def test_couple_assets(self, b): + from bigchaindb.models import Block, Transaction + + assets = [ + {'msg': '1'}, + {'msg': '2'}, + {'msg': '3'}, + ] + + txs = [] + # create 3 assets + for asset in assets: + tx = Transaction.create([b.me], [([b.me], 1)], asset=asset) + txs.append(tx) + + # create a `TRANSFER` transaction. + # the asset in `TRANSFER` transactions is not extracted + tx = Transaction.transfer(txs[0].to_inputs(), [([b.me], 1)], + asset_id=txs[0].id) + txs.append(tx) + + # create the block + block = Block(txs) + # decouple assets + assets_from_block, block_dict = block.decouple_assets() + + # reconstruct the block + block_dict_reconstructed = Block.couple_assets(block_dict, + assets_from_block) + + # check that the reconstructed block is the as the original block + assert block == Block.from_dict(block_dict_reconstructed) + + def test_get_asset_ids(self, b): + from bigchaindb.models import Block, Transaction + + assets = [ + {'msg': '1'}, + {'msg': '2'}, + {'msg': '3'}, + ] + + txs = [] + # create 3 assets + for asset in assets: + tx = Transaction.create([b.me], [([b.me], 1)], asset=asset) + txs.append(tx) + + # create a `TRANSFER` transaction. + # the asset in `TRANSFER` transactions is not extracted + tx = Transaction.transfer(txs[0].to_inputs(), [([b.me], 1)], + asset_id=txs[0].id) + txs.append(tx) + + # create the block + block = Block(txs) + # decouple assets + assets_from_block, block_dict = block.decouple_assets() + + # get the asset_ids and check that they are the same as the `CREATE` + # transactions + asset_ids = Block.get_asset_ids(block_dict) + assert asset_ids == [tx.id for tx in txs[:-1]] + + def test_from_db(self, b): + from bigchaindb.models import Block, Transaction + + assets = [ + {'msg': '1'}, + {'msg': '2'}, + {'msg': '3'}, + ] + + txs = [] + # create 3 assets + for asset in assets: + tx = Transaction.create([b.me], [([b.me], 1)], asset=asset) + txs.append(tx) + + # create a `TRANSFER` transaction. + # the asset in `TRANSFER` transactions is not extracted + tx = Transaction.transfer(txs[0].to_inputs(), [([b.me], 1)], + asset_id=txs[0].id) + txs.append(tx) + + # create the block + block = Block(txs) + # decouple assets + assets_from_block, block_dict = block.decouple_assets() + + # write the assets and block separatedly + b.write_assets(assets_from_block) + b.write_block(block) + + # check the reconstructed block is the same as the original block + block_from_db = Block.from_db(b, block_dict) + assert block == block_from_db From eb94181e1bc0ed904b25a3a1c626c5f2d863993b Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Thu, 11 May 2017 12:00:01 +0200 Subject: [PATCH 11/21] Fixed tests. Updated fixtures to flush the assets table after each test. --- tests/test_block_model.py | 2 ++ tests/utils.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/test_block_model.py b/tests/test_block_model.py index 981a64d7..f2b307de 100644 --- a/tests/test_block_model.py +++ b/tests/test_block_model.py @@ -1,3 +1,4 @@ +import pytest from pytest import raises @@ -253,6 +254,7 @@ class TestBlockModel(object): asset_ids = Block.get_asset_ids(block_dict) assert asset_ids == [tx.id for tx in txs[:-1]] + @pytest.mark.bdb def test_from_db(self, b): from bigchaindb.models import Block, Transaction diff --git a/tests/utils.py b/tests/utils.py index 53ffd86b..9af72a5d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -32,6 +32,7 @@ def flush_rethink_db(connection, dbname): connection.run(r.db(dbname).table('bigchain').delete()) connection.run(r.db(dbname).table('backlog').delete()) connection.run(r.db(dbname).table('votes').delete()) + connection.run(r.db(dbname).table('assets').delete()) except r.ReqlOpFailedError: pass @@ -41,6 +42,7 @@ def flush_mongo_db(connection, dbname): connection.conn[dbname].bigchain.delete_many({}) connection.conn[dbname].backlog.delete_many({}) connection.conn[dbname].votes.delete_many({}) + connection.conn[dbname].assets.delete_many({}) @singledispatch From ce414e46f3da825143c7df3c72295a68f61f073d Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Thu, 11 May 2017 12:29:08 +0200 Subject: [PATCH 12/21] Added missing docstrings. Fixed typos. --- bigchaindb/backend/query.py | 4 ++-- bigchaindb/core.py | 18 +++++++++++++++++- bigchaindb/models.py | 22 +++++++++++++++++++--- tests/test_block_model.py | 2 +- 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 5c37647c..8245fb3d 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -293,8 +293,8 @@ def get_last_voted_block_id(connection, node_pubkey): node_pubkey (str): base58 encoded public key. Returns: - The last block the node has voted on. If the node didn't cast - any vote then the genesis block is returned. + The last block id the node has voted on. If the node didn't cast + any vote then the genesis block id is returned. """ raise NotImplementedError diff --git a/bigchaindb/core.py b/bigchaindb/core.py index c6ff5608..1cd21222 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -641,8 +641,24 @@ class Bigchain(object): return self.block_election(block)['status'] def get_assets(self, asset_ids): - # TODO: write docstrings + """ + Return a list of assets that match the asset_ids + + Args: + asset_ids (:obj:`list` of :obj:`str`): A list of asset_ids to + retrieve from the database. + + Returs: + list: The list of assets returned from the database. + """ return backend.query.get_assets(self.connection, asset_ids) def write_assets(self, assets): + """ + Writes a list of assets into the database. + + Args: + assets (:obj:`list` of :obj:`dict`): A list of assets to write to + the database. + """ return backend.query.write_assets(self.connection, assets) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 53266930..c77c3338 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -88,8 +88,22 @@ class Transaction(Transaction): @classmethod def from_db(cls, bigchain, tx_dict): - # TODO: write docstring - if tx_dict['operation'] in [Transaction.CREATE, Transaction.CREATE]: + """ + Helper method that reconstructs a transaction dict that was returned + from the database. It checks what asset_id to retrieve, retrieves the + asset from the asset table and reconstructs the transaction. + + Args: + bigchain (:class:`~bigchaindb.Bigchain`): An instance of Bigchain + used to perform database queries. + tx_dict (:obj:`dict`): The transaction dict as returned from the + database. + + Returns: + :class:`~Transaction` + + """ + if tx_dict['operation'] in [Transaction.CREATE, Transaction.GENESIS]: # TODO: Maybe replace this call to a call to get_asset_by_id asset = list(bigchain.get_assets([tx_dict['id']]))[0] asset.pop('id') @@ -317,12 +331,14 @@ class Block(object): def from_db(cls, bigchain, block_dict): """ Helper method that reconstructs a block_dict that was returned from - the database. If checks what asset_ids to retrieve, retrieves the + the database. It checks what asset_ids to retrieve, retrieves the assets from the assets table and reconstructs the block. Args: bigchain (:class:`~bigchaindb.Bigchain`): An instance of Bigchain used to perform database queries. + block_dict(:obj:`dict`): The block dict as returned from the + database. Returns: :class:`~Block` diff --git a/tests/test_block_model.py b/tests/test_block_model.py index f2b307de..0824c40f 100644 --- a/tests/test_block_model.py +++ b/tests/test_block_model.py @@ -281,7 +281,7 @@ class TestBlockModel(object): # decouple assets assets_from_block, block_dict = block.decouple_assets() - # write the assets and block separatedly + # write the assets and block separately b.write_assets(assets_from_block) b.write_block(block) From ccaae91601d174dbede2102458410597ec14a8ed Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Fri, 12 May 2017 15:22:11 +0200 Subject: [PATCH 13/21] Added full text search support for mongodb. - Create widlcard text index for the assets collection. - Created backend query to to text search on assets collection. - Added and updated tests. --- bigchaindb/backend/mongodb/query.py | 25 ++++++++ bigchaindb/backend/mongodb/schema.py | 5 +- bigchaindb/backend/query.py | 8 +++ tests/backend/mongodb/test_queries.py | 84 +++++++++++++++++++++++++++ tests/backend/mongodb/test_schema.py | 2 +- 5 files changed, 122 insertions(+), 2 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 39d99d4a..3d989a3d 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -327,3 +327,28 @@ def get_unvoted_blocks(conn, node_pubkey): 'votes': False, '_id': False }} ])) + + +@register_query(MongoDBConnection) +def text_search(conn, search, language='english', case_sensitive=False, + diacritic_sensitive=False, text_score=False, limit=0): + cursor = conn.run( + conn.collection('assets') + .find({'$text': { + '$search': search, + '$language': language, + '$caseSensitive': case_sensitive, + '$diacriticSensitive': diacritic_sensitive}}, + {'score': {'$meta': 'textScore'}, '_id': False}) + .sort([('score', {'$meta': 'textScore'})]) + .limit(limit)) + + if text_score: + return cursor + else: + return (_remove_text_score(asset) for asset in cursor) + + +def _remove_text_score(asset): + asset.pop('score', None) + return asset diff --git a/bigchaindb/backend/mongodb/schema.py b/bigchaindb/backend/mongodb/schema.py index 12b873e0..6c54bfd8 100644 --- a/bigchaindb/backend/mongodb/schema.py +++ b/bigchaindb/backend/mongodb/schema.py @@ -2,7 +2,7 @@ import logging -from pymongo import ASCENDING, DESCENDING +from pymongo import ASCENDING, DESCENDING, TEXT from bigchaindb import backend from bigchaindb.common import exceptions @@ -113,3 +113,6 @@ def create_assets_secondary_index(conn, dbname): conn.conn[dbname]['assets'].create_index('id', name='asset_id', unique=True) + + # full text search index + conn.conn[dbname]['assets'].create_index([('$**', TEXT)], name='text') diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 8245fb3d..705b0306 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -325,3 +325,11 @@ def get_txids_filtered(connection, asset_id, operation=None): """ raise NotImplementedError + + +@singledispatch +def text_search(conn, search, language='english', case_sensitive=False, + diacritic_sensitive=False, text_score=False, limit=0): + # TODO: docstring + + raise NotImplementedError diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index c43c5fa4..9b2ad1cf 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -464,3 +464,87 @@ def test_get_assets(): assert cursor.count() == 2 assert list(cursor.sort('id', pymongo.ASCENDING)) == assets[::2] + + +def test_text_search(): + from bigchaindb.backend import connect, query + conn = connect() + + # Example data and tests cases taken from the mongodb documentation + # https://docs.mongodb.com/manual/reference/operator/query/text/ + assets = [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + {'id': 3, 'subject': 'Baking a cake', 'author': 'abc', 'views': 90}, + {'id': 4, 'subject': 'baking', 'author': 'xyz', 'views': 100}, + {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200}, + {'id': 6, 'subject': 'Сырники', 'author': 'jkl', 'views': 80}, + {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10}, + {'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10} + ] + + # insert the assets + conn.db.assets.insert_many(deepcopy(assets), ordered=False) + + # test search single word + assert list(query.text_search(conn, 'coffee')) == [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10}, + ] + + # match any of the search terms + assert list(query.text_search(conn, 'bake coffee cake')) == [ + {'author': 'abc', 'id': 3, 'subject': 'Baking a cake', 'views': 90}, + {'author': 'xyz', 'id': 1, 'subject': 'coffee', 'views': 50}, + {'author': 'xyz', 'id': 4, 'subject': 'baking', 'views': 100}, + {'author': 'efg', 'id': 2, 'subject': 'Coffee Shopping', 'views': 5}, + {'author': 'efg', 'id': 7, 'subject': 'coffee and cream', 'views': 10} + ] + + # search for a phrase + assert list(query.text_search(conn, '\"coffee shop\"')) == [ + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + ] + + # exclude documents that contain a term + assert list(query.text_search(conn, 'coffee -shop')) == [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, + {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10}, + ] + + # search different language + assert list(query.text_search(conn, 'leche', language='es')) == [ + {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200}, + {'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10} + ] + + # case and diacritic insensitive search + assert list(query.text_search(conn, 'сы́рники CAFÉS')) == [ + {'id': 6, 'subject': 'Сырники', 'author': 'jkl', 'views': 80}, + {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200}, + {'id': 8, 'subject': 'Cafe con Leche', 'author': 'xyz', 'views': 10} + ] + + # case sensitive search + assert list(query.text_search(conn, 'Coffee', case_sensitive=True)) == [ + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + ] + + # diacritic sensitive search + assert list(query.text_search(conn, 'CAFÉ', diacritic_sensitive=True)) == [ + {'id': 5, 'subject': 'Café Con Leche', 'author': 'abc', 'views': 200}, + ] + + # return text score + assert list(query.text_search(conn, 'coffee', text_score=True)) == [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50, 'score': 1.0}, + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5, 'score': 0.75}, + {'id': 7, 'subject': 'coffee and cream', 'author': 'efg', 'views': 10, 'score': 0.75}, + ] + + # limit search result + assert list(query.text_search(conn, 'coffee', limit=2)) == [ + {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, + {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, + ] diff --git a/tests/backend/mongodb/test_schema.py b/tests/backend/mongodb/test_schema.py index e3b320bd..e11dbfe8 100644 --- a/tests/backend/mongodb/test_schema.py +++ b/tests/backend/mongodb/test_schema.py @@ -33,7 +33,7 @@ def test_init_creates_db_tables_and_indexes(): assert sorted(indexes) == ['_id_', 'block_and_voter'] indexes = conn.conn[dbname]['assets'].index_information().keys() - assert sorted(indexes) == ['_id_', 'asset_id'] + assert sorted(indexes) == ['_id_', 'asset_id', 'text'] def test_init_database_fails_if_db_exists(): From 69cafee156d088ef4de0fca0fc4dfbc53ecb3678 Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Wed, 17 May 2017 15:20:20 +0200 Subject: [PATCH 14/21] Add steps to Release_Process.md, to update Docker image version for k8s --- Release_Process.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Release_Process.md b/Release_Process.md index e4a988a1..be4c448a 100644 --- a/Release_Process.md +++ b/Release_Process.md @@ -10,6 +10,7 @@ that [major version 0.x does not export a stable API](http://semver.org/#spec-it A minor release is preceeded by a feature freeze and created from the 'master' branch. This is a summary of the steps we go through to release a new minor version of BigchainDB Server. 1. Update the `CHANGELOG.md` file in master +1. In `k8s/bigchaindb/bigchaindb-dep.yaml`, find the line of the form `image: bigchaindb/bigchaindb:0.8.1` and change the version number to the new version number, e.g. `0.9.0`. (This is the Docker image that Kubernetes should pull from Docker Hub.) Commit that change to master 1. Create and checkout a new branch for the minor release, named after the minor version, without a preceeding 'v', e.g. `git checkout -b 0.9` (*not* 0.9.0, this new branch will be for e.g. 0.9.0, 0.9.1, 0.9.2, etc. each of which will be identified by a tagged commit) 1. In `bigchaindb/version.py`, update `__version__` and `__short_version__`, e.g. to `0.9` and `0.9.0` (with no `.dev` on the end) 1. Commit that change, and push the new branch to GitHub @@ -26,8 +27,11 @@ A patch release is similar to a minor release, but piggybacks on an existing min 1. Check out the minor release branch, e.g. `0.9` 1. Apply the changes you want, e.g. using `git cherry-pick`. 1. Update the `CHANGELOG.md` file -1. Increment the patch version in `bigchaindb/version.py`, e.g. "0.9.1" -1. Commit that change, and push the updated branch to GitHub +1. Increment the patch version in `bigchaindb/version.py`, e.g. `0.9.1` +1. Commit that change +1. In `k8s/bigchaindb/bigchaindb-dep.yaml`, find the line of the form `image: bigchaindb/bigchaindb:0.9.0` and change the version number to the new version number, e.g. `0.9.1`. (This is the Docker image that Kubernetes should pull from Docker Hub.) +1. Commit that change +1. Push the updated minor release branch to GitHub 1. Follow steps outlined in [Common Steps](#common-steps) 1. Cherry-pick the `CHANGELOG.md` update commit (made above) to the `master` branch From c6133c827d51edb5c10a8577b15b37ecf982b5d2 Mon Sep 17 00:00:00 2001 From: Troy McConaghy Date: Wed, 17 May 2017 14:29:37 +0200 Subject: [PATCH 15/21] docs: updated link to JS/nodejs driver --- docs/server/source/drivers-clients/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/server/source/drivers-clients/index.rst b/docs/server/source/drivers-clients/index.rst index 0bfde7ad..ef749d55 100644 --- a/docs/server/source/drivers-clients/index.rst +++ b/docs/server/source/drivers-clients/index.rst @@ -20,7 +20,7 @@ Community-Driven Libraries and Tools Some of these projects are a work in progress, but may still be useful. -* `Javascript transaction builder `_ +* `JavaScript / Node.js driver `_ * `Haskell transaction builder `_ * `Go driver `_ * `Java driver `_ From 6900e864584f89dd8f07132a45f801b16be4bc10 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 24 May 2017 11:38:15 +0200 Subject: [PATCH 16/21] Filter out assets from invalid transactions - Created docstrings - Created tests - Raise an exception when trying to use text search with RethinkDB. --- bigchaindb/backend/mongodb/query.py | 2 +- bigchaindb/backend/query.py | 28 ++++++++-- bigchaindb/core.py | 11 ++++ tests/db/test_bigchain_api.py | 83 +++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 4 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 3d989a3d..ffba23be 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -330,7 +330,7 @@ def get_unvoted_blocks(conn, node_pubkey): @register_query(MongoDBConnection) -def text_search(conn, search, language='english', case_sensitive=False, +def text_search(conn, search, *, language='english', case_sensitive=False, diacritic_sensitive=False, text_score=False, limit=0): cursor = conn.run( conn.collection('assets') diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 705b0306..83179c2d 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -2,6 +2,8 @@ from functools import singledispatch +from bigchaindb.backend.exceptions import OperationError + @singledispatch def write_transaction(connection, signed_transaction): @@ -328,8 +330,28 @@ def get_txids_filtered(connection, asset_id, operation=None): @singledispatch -def text_search(conn, search, language='english', case_sensitive=False, +def text_search(conn, search, *, language='english', case_sensitive=False, diacritic_sensitive=False, text_score=False, limit=0): - # TODO: docstring + """Return all the assets that match the text search. - raise NotImplementedError + The results are sorted by text score. + + Args: + search (str): Text search string to query the text index + language (str, optional): The language for the search and the rules for + stemmer and tokenizer. If the language is `None` text search uses + simple tokenization and no stemming. + case_sensitive (bool, optional): Enable or disable case sensitive + search. + diacritic_sensitive (bool, optional): Enable or disable case sensitive + diacritic search. + text_score (bool, optional): If `True` returns the text score with + each document. + limit (int, optional): Limit the number of returned documents. + + Returns: + :obj:`list` of :obj:`dict`: a list of assets + """ + + raise OperationError('This query is only supported when running ' + 'BigchainDB with MongoDB as the backend.') diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 1cd21222..42589b36 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -662,3 +662,14 @@ class Bigchain(object): the database. """ return backend.query.write_assets(self.connection, assets) + + def text_search(self, search, *, limit=0): + assets = backend.query.text_search(self.connection, search, limit=limit) + + # TODO: This is not efficient. There may be a more efficient way to + # query by storing block ids with the assets and using fastquery. + # See https://github.com/bigchaindb/bigchaindb/issues/1496 + for asset in assets: + tx, status = self.get_transaction(asset['id'], True) + if status == self.TX_VALID: + yield asset diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 5960f171..339c565d 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -213,6 +213,89 @@ class TestBigchainApi(object): assert b.get_transaction(tx1.id) is None assert b.get_transaction(tx2.id) == tx2 + @pytest.mark.genesis + def test_text_search(self, b): + from bigchaindb.models import Transaction + from bigchaindb.backend.exceptions import OperationError + from bigchaindb.backend.mongodb.connection import MongoDBConnection + + # define the assets + asset1 = {'msg': 'BigchainDB 1'} + asset2 = {'msg': 'BigchainDB 2'} + asset3 = {'msg': 'BigchainDB 3'} + + # create the transactions + tx1 = Transaction.create([b.me], [([b.me], 1)], + asset=asset1).sign([b.me_private]) + tx2 = Transaction.create([b.me], [([b.me], 1)], + asset=asset2).sign([b.me_private]) + tx3 = Transaction.create([b.me], [([b.me], 1)], + asset=asset3).sign([b.me_private]) + + # create the block + block = b.create_block([tx1, tx2, tx3]) + b.write_block(block) + + # vote valid + vote = b.vote(block.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + + # get the assets through text search + # this query only works with MongoDB + try: + assets = list(b.text_search('bigchaindb')) + except OperationError as exc: + assert not isinstance(b.connection, MongoDBConnection) + return + + assert len(assets) == 3 + + @pytest.mark.genesis + def test_text_search_returns_valid_only(self, monkeypatch, b): + from bigchaindb.models import Transaction + from bigchaindb.backend.exceptions import OperationError + from bigchaindb.backend.mongodb.connection import MongoDBConnection + + asset_valid = {'msg': 'Hello BigchainDB!'} + asset_invalid = {'msg': 'Goodbye BigchainDB!'} + + monkeypatch.setattr('time.time', lambda: 1000000000) + tx1 = Transaction.create([b.me], [([b.me], 1)], + asset=asset_valid) + tx1 = tx1.sign([b.me_private]) + block1 = b.create_block([tx1]) + b.write_block(block1) + + monkeypatch.setattr('time.time', lambda: 1000000020) + tx2 = Transaction.create([b.me], [([b.me], 1)], + asset=asset_invalid) + tx2 = tx2.sign([b.me_private]) + block2 = b.create_block([tx2]) + b.write_block(block2) + + # vote the first block valid + vote = b.vote(block1.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + + # vote the second block invalid + vote = b.vote(block2.id, b.get_last_voted_block().id, False) + b.write_vote(vote) + + # get assets with text search + try: + assets = list(b.text_search('bigchaindb')) + except OperationError: + assert not isinstance(b.connection, MongoDBConnection) + return + + # should only return one asset + assert len(assets) == 1 + # should return the asset created by tx1 + assert assets[0] == { + 'data': {'msg': 'Hello BigchainDB!'}, + 'id': tx1.id + } + @pytest.mark.usefixtures('inputs') def test_write_transaction(self, b, user_pk, user_sk): from bigchaindb import Bigchain From 56379e9bec27e0a9e98ebc564c80f61e8c36c3c7 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 24 May 2017 12:11:40 +0200 Subject: [PATCH 17/21] fix pep8 issue --- tests/backend/mongodb/test_queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index a536525b..897a0f06 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -596,4 +596,4 @@ def test_text_search(): assert list(query.text_search(conn, 'coffee', limit=2)) == [ {'id': 1, 'subject': 'coffee', 'author': 'xyz', 'views': 50}, {'id': 2, 'subject': 'Coffee Shopping', 'author': 'efg', 'views': 5}, - ] \ No newline at end of file + ] From cda8259e5704b0d03568d602d7f039c5219bf518 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 24 May 2017 12:38:34 +0200 Subject: [PATCH 18/21] Fixed typos --- bigchaindb/backend/query.py | 4 ++-- bigchaindb/core.py | 2 +- bigchaindb/models.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 8245fb3d..9b2197a5 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -229,7 +229,7 @@ def get_assets(connection, asset_ids): """Get a list of assets from the assets table. Args: - asset_ids (list): a of list of ids for the assets to be retrieved from + asset_ids (list): a list of ids for the assets to be retrieved from the database. Returns: @@ -293,7 +293,7 @@ def get_last_voted_block_id(connection, node_pubkey): node_pubkey (str): base58 encoded public key. Returns: - The last block id the node has voted on. If the node didn't cast + The id of the last block the node has voted on. If the node didn't cast any vote then the genesis block id is returned. """ diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 1cd21222..b2c8d398 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -648,7 +648,7 @@ class Bigchain(object): asset_ids (:obj:`list` of :obj:`str`): A list of asset_ids to retrieve from the database. - Returs: + Returns: list: The list of assets returned from the database. """ return backend.query.get_assets(self.connection, asset_ids) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index c77c3338..58743939 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -106,7 +106,7 @@ class Transaction(Transaction): if tx_dict['operation'] in [Transaction.CREATE, Transaction.GENESIS]: # TODO: Maybe replace this call to a call to get_asset_by_id asset = list(bigchain.get_assets([tx_dict['id']]))[0] - asset.pop('id') + del asset['id'] tx_dict.update({'asset': asset}) return cls.from_dict(tx_dict) From ae564974561e4de29d8469252aff69ef073432bd Mon Sep 17 00:00:00 2001 From: Leo Arias Date: Fri, 19 May 2017 15:37:38 -0600 Subject: [PATCH 19/21] set the version of the snap using git tags closes #1436 --- snap/snapcraft.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index aa3a9bca..4fe5618f 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -1,5 +1,5 @@ name: bigchaindb -version: master +version: git summary: a scalable blockchain database description: | With high throughput, sub-second latency and powerful functionality to From e9979dda85732eb3134fd0a670c9a92ed3c29e25 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Wed, 24 May 2017 17:05:43 +0200 Subject: [PATCH 20/21] fixed typos --- bigchaindb/models.py | 13 ++++++------- tests/test_block_model.py | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/bigchaindb/models.py b/bigchaindb/models.py index 58743939..8ca79b67 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -351,7 +351,7 @@ class Block(object): def decouple_assets(self): """ - Extracts the assets from the `CREATE` transactions in the block. + Extracts the assets from the ``CREATE`` transactions in the block. Returns: tuple: (assets, block) with the assets being a list of dicts and @@ -372,9 +372,9 @@ class Block(object): @staticmethod def couple_assets(block_dict, assets): """ - Give a block_dict with not assets (as returned from a database call) - and a list of assets, reconstruct the original block by puting the - assets back into the `CREATE` transactions in the block. + Given a block_dict with no assets (as returned from a database call) + and a list of assets, reconstruct the original block by putting the + assets back into the ``CREATE`` transactions in the block. Args: block_dict (:obj:`dict`): The block dict as returned from a @@ -391,15 +391,14 @@ class Block(object): for transaction in block_dict['block']['transactions']: if transaction['operation'] in [Transaction.CREATE, Transaction.GENESIS]: - transaction.update({'asset': assets.get(transaction['id'], - None)}) + transaction.update({'asset': assets.get(transaction['id'])}) return block_dict @staticmethod def get_asset_ids(block_dict): """ Given a block_dict return all the asset_ids for that block (the txid - of CREATE transactions). Usefull to know which assets to retrieve + of CREATE transactions). Useful to know which assets to retrieve from the database to reconstruct the block. Args: diff --git a/tests/test_block_model.py b/tests/test_block_model.py index 0824c40f..6e14d293 100644 --- a/tests/test_block_model.py +++ b/tests/test_block_model.py @@ -220,7 +220,7 @@ class TestBlockModel(object): block_dict_reconstructed = Block.couple_assets(block_dict, assets_from_block) - # check that the reconstructed block is the as the original block + # check that the reconstructed block is the same as the original block assert block == Block.from_dict(block_dict_reconstructed) def test_get_asset_ids(self, b): From 3819ae5d654968e2c6850e4407abe170ac9034a2 Mon Sep 17 00:00:00 2001 From: Rodolphe Marques Date: Fri, 26 May 2017 10:34:58 +0200 Subject: [PATCH 21/21] Clean up code - Fixed docstrings --- bigchaindb/backend/mongodb/query.py | 4 ++-- bigchaindb/backend/query.py | 6 ++++-- tests/db/test_bigchain_api.py | 5 ++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 9b25c0fc..eaef721d 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -371,8 +371,8 @@ def text_search(conn, search, *, language='english', case_sensitive=False, if text_score: return cursor - else: - return (_remove_text_score(asset) for asset in cursor) + + return (_remove_text_score(asset) for asset in cursor) def _remove_text_score(asset): diff --git a/bigchaindb/backend/query.py b/bigchaindb/backend/query.py index 07fd3e4c..74879cef 100644 --- a/bigchaindb/backend/query.py +++ b/bigchaindb/backend/query.py @@ -363,17 +363,19 @@ def text_search(conn, search, *, language='english', case_sensitive=False, """Return all the assets that match the text search. The results are sorted by text score. + For more information about the behavior of text search on MongoDB see + https://docs.mongodb.com/manual/reference/operator/query/text/#behavior Args: search (str): Text search string to query the text index language (str, optional): The language for the search and the rules for - stemmer and tokenizer. If the language is `None` text search uses + stemmer and tokenizer. If the language is ``None`` text search uses simple tokenization and no stemming. case_sensitive (bool, optional): Enable or disable case sensitive search. diacritic_sensitive (bool, optional): Enable or disable case sensitive diacritic search. - text_score (bool, optional): If `True` returns the text score with + text_score (bool, optional): If ``True`` returns the text score with each document. limit (int, optional): Limit the number of returned documents. diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index feb24e9c..05b07bf6 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -246,9 +246,8 @@ class TestBigchainApi(object): assets = list(b.text_search('bigchaindb')) except OperationError as exc: assert not isinstance(b.connection, MongoDBConnection) - return - - assert len(assets) == 3 + else: + assert len(assets) == 3 @pytest.mark.genesis def test_text_search_returns_valid_only(self, monkeypatch, b):