diff --git a/.gitattributes b/.gitattributes index cd945c78..d278a72d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,11 +1,9 @@ -benchmarking-tests export-ignore deploy-cluster-aws export-ignore docs export-ignore ntools export-ignore -speed-tests export-ignore tests export-ignore .gitattributes export-ignore .gitignore export-ignore .travis.yml export-ignore *.md export-ignore -codecov.yml export-ignore \ No newline at end of file +codecov.yml export-ignore diff --git a/.gitignore b/.gitignore index efa00db2..20d71296 100644 --- a/.gitignore +++ b/.gitignore @@ -71,8 +71,6 @@ deploy-cluster-aws/confiles/ deploy-cluster-aws/client_confile deploy-cluster-aws/hostlist.py deploy-cluster-aws/ssh_key.py -benchmarking-tests/hostlist.py -benchmarking-tests/ssh_key.py # Ansible-specific files ntools/one-m/ansible/hosts @@ -80,7 +78,7 @@ ntools/one-m/ansible/ansible.cfg # Just in time documentation docs/server/source/schema -docs/server/source/drivers-clients/samples +docs/server/source/http-samples # Terraform state files # See https://stackoverflow.com/a/41482391 diff --git a/.travis.yml b/.travis.yml index da7ae05f..9fc4e278 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,8 @@ cache: pip python: - 3.4 - 3.5 - + - 3.6 + env: - TOXENV=flake8 - TOXENV=docsroot @@ -13,11 +14,17 @@ env: matrix: fast_finish: true exclude: - - python: 3.4 + - python: 3.4 env: TOXENV=flake8 - - python: 3.4 + - python: 3.4 env: TOXENV=docsroot - - python: 3.4 + - python: 3.4 + env: TOXENV=docsserver + - python: 3.5 + env: TOXENV=flake8 + - python: 3.5 + env: TOXENV=docsroot + - python: 3.5 env: TOXENV=docsserver include: - python: 3.4 @@ -30,6 +37,12 @@ matrix: env: BIGCHAINDB_DATABASE_BACKEND=rethinkdb - python: 3.5 env: BIGCHAINDB_DATABASE_BACKEND=mongodb + - python: 3.6 + addons: + rethinkdb: '2.3.5' + env: BIGCHAINDB_DATABASE_BACKEND=rethinkdb + - python: 3.6 + env: BIGCHAINDB_DATABASE_BACKEND=mongodb before_install: sudo .ci/travis-before-install.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index f1f8d6b4..3db903e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,107 @@ For reference, the possible headings are: * **External Contributors** to list contributors outside of BigchainDB GmbH. * **Notes** +## [0.10.1] - 2017-04-19 +Tag name: v0.10.1 + +## Added +* Documentation for the BigchainDB settings `wsserver.host` and `wsserver.port`. [Pull Request #1408](https://github.com/bigchaindb/bigchaindb/pull/1408) + +## Fixed +* Fixed `Dockerfile`, which was failing to build. It now starts `FROM python:3.6` (instead of `FROM ubuntu:xenial`). [Pull Request #1410](https://github.com/bigchaindb/bigchaindb/pull/1410) +* Fixed the `Makefile` so that `release` depends on `dist`. [Pull Request #1405](https://github.com/bigchaindb/bigchaindb/pull/1405) + +## [0.10.0] - 2017-04-18 +Tag name: v0.10.0 + +### Added +* Improved logging. Added logging to file. Added `--log-level` option to `bigchaindb start` command. Added new logging configuration settings. Pull Requests +[#1285](https://github.com/bigchaindb/bigchaindb/pull/1285), +[#1307](https://github.com/bigchaindb/bigchaindb/pull/1307), +[#1324](https://github.com/bigchaindb/bigchaindb/pull/1324), +[#1326](https://github.com/bigchaindb/bigchaindb/pull/1326), +[#1327](https://github.com/bigchaindb/bigchaindb/pull/1327), +[#1330](https://github.com/bigchaindb/bigchaindb/pull/1330), +[#1365](https://github.com/bigchaindb/bigchaindb/pull/1365), +[#1394](https://github.com/bigchaindb/bigchaindb/pull/1394), +[#1396](https://github.com/bigchaindb/bigchaindb/pull/1396), +[#1398](https://github.com/bigchaindb/bigchaindb/pull/1398) and +[#1402](https://github.com/bigchaindb/bigchaindb/pull/1402) +* Events API using WebSocket protocol. Pull Requests +[#1086](https://github.com/bigchaindb/bigchaindb/pull/1086), +[#1347](https://github.com/bigchaindb/bigchaindb/pull/1347), +[#1349](https://github.com/bigchaindb/bigchaindb/pull/1349), +[#1356](https://github.com/bigchaindb/bigchaindb/pull/1356), +[#1368](https://github.com/bigchaindb/bigchaindb/pull/1368), +[#1401](https://github.com/bigchaindb/bigchaindb/pull/1401) and +[#1403](https://github.com/bigchaindb/bigchaindb/pull/1403) +* Initial support for using SSL with MongoDB (work in progress). Pull Requests +[#1299](https://github.com/bigchaindb/bigchaindb/pull/1299) and +[#1348](https://github.com/bigchaindb/bigchaindb/pull/1348) + +### Changed +* The main BigchainDB Dockerfile (and its generated Docker image) now contains only BigchainDB Server. (It used to contain both BigchainDB Server and RethinkDB.) You must now run MongoDB or RethinkDB in a separate Docker container. [Pull Request #1174](https://github.com/bigchaindb/bigchaindb/pull/1174) +* Made separate schemas for CREATE and TRANSFER transactions. [Pull Request #1257](https://github.com/bigchaindb/bigchaindb/pull/1257) +* When signing transactions with threshold conditions, we now sign all subconditions for a public key. [Pull Request #1294](https://github.com/bigchaindb/bigchaindb/pull/1294) +* Many changes to the voting-related code, including how we validate votes and prevent duplicate votes by the same node. Pull Requests [#1215](https://github.com/bigchaindb/bigchaindb/pull/1215) and [#1258](https://github.com/bigchaindb/bigchaindb/pull/1258) + +### Removed +* Removed the `bigchaindb load` command. Pull Requests +[#1261](https://github.com/bigchaindb/bigchaindb/pull/1261), +[#1273](https://github.com/bigchaindb/bigchaindb/pull/1273) and +[#1301](https://github.com/bigchaindb/bigchaindb/pull/1301) +* Removed old `/speed-tests` and `/benchmarking-tests` directories. [Pull Request #1359](https://github.com/bigchaindb/bigchaindb/pull/1359) + +### Fixed +* Fixed the URL of the BigchainDB docs returned by the HTTP API. [Pull Request #1178](https://github.com/bigchaindb/bigchaindb/pull/1178) +* Fixed the MongoDB changefeed: it wasn't reporting update operations. [Pull Request #1193](https://github.com/bigchaindb/bigchaindb/pull/1193) +* Fixed the block-creation process: it wasn't checking if the transaction was previously included in: + * a valid block. [Pull Request #1208](https://github.com/bigchaindb/bigchaindb/pull/1208) + * the block-under-construction. Pull Requests [#1237](https://github.com/bigchaindb/bigchaindb/issues/1237) and [#1377](https://github.com/bigchaindb/bigchaindb/issues/1377) + +### External Contributors +In alphabetical order by GitHub username: +* @anryko - [Pull Request #1277](https://github.com/bigchaindb/bigchaindb/pull/1277) +* @anujism - [Pull Request #1366](https://github.com/bigchaindb/bigchaindb/pull/1366) +* @jackric - [Pull Request #1365](https://github.com/bigchaindb/bigchaindb/pull/1365) +* @lavinasachdev3 - [Pull Request #1358](https://github.com/bigchaindb/bigchaindb/pull/1358) +* @morrme - [Pull Request #1340](https://github.com/bigchaindb/bigchaindb/pull/1340) +* @tomconte - [Pull Request #1299](https://github.com/bigchaindb/bigchaindb/pull/1299) +* @tymlez - Pull Requests [#1108](https://github.com/bigchaindb/bigchaindb/pull/1108) & [#1209](https://github.com/bigchaindb/bigchaindb/pull/1209) + +### Notes +* MongoDB is now the recommended database backend (not RethinkDB). +* There are some initial docs about how to deploy a BigchainDB node on Kubernetes. It's work in progress. + + +## [0.9.5] - 2017-03-29 +Tag name: v0.9.5 + +### Fixed +Upgrade `python-rapidjson` to `0.0.11`(fixes #1350 - thanks to @ferOnti for +reporting). + +## [0.9.4] - 2017-03-16 +Tag name: v0.9.4 + +### Fixed +Fixed #1271 (false double spend error). Thanks to @jmduque for reporting the +problem along with a very detailed diagnosis and useful recommendations. + +## [0.9.3] - 2017-03-06 +Tag name: v0.9.3 + +### Fixed +Fixed HTTP API 500 error on `GET /outputs`: issues #1200 and #1231. + +## [0.9.2] - 2017-03-02 +Tag name: v0.9.2 + +### Fixed +Pin `python-rapidjson` library in `setup.py` to prevent `bigchaindb`'s +installation to fail due to +https://github.com/python-rapidjson/python-rapidjson/issues/62. + ## [0.9.1] - 2017-02-06 Tag name: v0.9.1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 03d02403..840a0895 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,6 +2,8 @@ There are many ways you can contribute to the BigchainDB project, some very easy and others more involved. We want to be friendly and welcoming to all potential contributors, so we ask that everyone involved abide by some simple guidelines outlined in our [Code of Conduct](./CODE_OF_CONDUCT.md). +Or, are you interested in contributing full-time? BigchainDB is hiring. See [here](https://github.com/bigchaindb/org/blob/master/engjob.md). + ## Easy Ways to Contribute The BigchainDB community has a Google Group and a Gitter chatroom. Our [Community page](https://www.bigchaindb.com/community) has more information about those. @@ -143,6 +145,13 @@ Once you accept and submit the CLA, we'll email you with further instructions. ( Someone will then merge your branch or suggest changes. If we suggest changes, you won't have to open a new pull request, you can just push new code to the same branch (on `origin`) as you did before creating the pull request. +### Tip: Upgrading All BigchainDB Dependencies + +Over time, your versions of the Python packages used by BigchainDB will get out of date. You can upgrade them using: +```text +pip install --upgrade -e .[dev] +``` + ## Quick Links * [BigchainDB Community links](https://www.bigchaindb.com/community) diff --git a/Dockerfile b/Dockerfile index bcfa8609..807761fe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,41 +1,17 @@ -FROM ubuntu:xenial - -# From http://stackoverflow.com/a/38553499 - -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y locales - -RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \ - echo 'LANG="en_US.UTF-8"'>/etc/default/locale && \ - dpkg-reconfigure --frontend=noninteractive locales && \ - update-locale LANG=en_US.UTF-8 - -ENV LANG en_US.UTF-8 - -# The `apt-get update` command executed with the install instructions should -# not use a locally cached storage layer. Force update the cache again. -# https://docs.docker.com/engine/userguide/eng-image/dockerfile_best-practices/#run -RUN apt-get update && apt-get -y install python3 python3-pip libffi-dev \ - && pip3 install --upgrade pip \ - && pip3 install --upgrade setuptools - +FROM python:3.6 +LABEL maintainer "dev@bigchaindb.com" RUN mkdir -p /usr/src/app - COPY . /usr/src/app/ - WORKDIR /usr/src/app - -RUN pip3 install --no-cache-dir -e . - +RUN apt-get -qq update \ + && apt-get -y upgrade \ + && pip install --no-cache-dir . \ + && apt-get autoremove \ + && apt-get clean VOLUME ["/data"] - WORKDIR /data - ENV BIGCHAINDB_CONFIG_PATH /data/.bigchaindb ENV BIGCHAINDB_SERVER_BIND 0.0.0.0:9984 -# BigchainDB Server doesn't need BIGCHAINDB_API_ENDPOINT any more -# but maybe our Docker or Docker Compose stuff does? -# ENV BIGCHAINDB_API_ENDPOINT http://bigchaindb:9984/api/v1 - +ENV BIGCHAINDB_WSSERVER_HOST 0.0.0.0 ENTRYPOINT ["bigchaindb"] - CMD ["start"] diff --git a/Dockerfile-dev b/Dockerfile-dev index 2ae4e2ba..17c8b073 100644 --- a/Dockerfile-dev +++ b/Dockerfile-dev @@ -1,13 +1,21 @@ -FROM python:3.5 +FROM python:3.6 +LABEL maintainer "dev@bigchaindb.com" -RUN apt-get update && apt-get install -y python3.4 vim +RUN apt-get update \ + && apt-get install -y vim \ + && pip install pynacl \ + && apt-get autoremove \ + && apt-get clean + +VOLUME ["/data"] +WORKDIR /data + +ENV BIGCHAINDB_CONFIG_PATH /data/.bigchaindb +ENV BIGCHAINDB_SERVER_BIND 0.0.0.0:9984 +ENV BIGCHAINDB_WSSERVER_HOST 0.0.0.0 RUN mkdir -p /usr/src/app -WORKDIR /usr/src/app - -RUN pip install --upgrade pip - COPY . /usr/src/app/ - +WORKDIR /usr/src/app RUN pip install --no-cache-dir -e .[dev] -RUN bigchaindb -y configure rethinkdb +RUN bigchaindb -y configure mongodb diff --git a/HOW_TO_HANDLE_PULL_REQUESTS.md b/HOW_TO_HANDLE_PULL_REQUESTS.md index 4dfbec15..6114c7ac 100644 --- a/HOW_TO_HANDLE_PULL_REQUESTS.md +++ b/HOW_TO_HANDLE_PULL_REQUESTS.md @@ -51,3 +51,15 @@ END BLOCK (END OF EMAIL) The next step is to wait for them to copy that comment into the comments of the indicated pull request. Once they do so, it's safe to merge the pull request. + +## How to Handle CLA Agreement Emails with No Associated Pull Request + +Reply with an email like this: + +Hi [First Name], + +Today I got an email (copied below) to tell me that you agreed to the BigchainDB Contributor License Agreement. Did you intend to do that? + +If no, then you can ignore this email. + +If yes, then there's another step to connect your email address with your GitHub account. To do that, you must first create a pull request in one of the BigchainDB repositories on GitHub. Once you've done that, please reply to this email with a link to the pull request. Then I'll send you a special block of text to paste into the comments on that pull request. diff --git a/Makefile b/Makefile index 7fc9c1c0..a3012a03 100644 --- a/Makefile +++ b/Makefile @@ -51,18 +51,14 @@ lint: ## check style with flake8 flake8 bigchaindb tests test: ## run tests quickly with the default Python - py.test - + pytest -v -n auto test-all: ## run tests on every Python version with tox tox coverage: ## check code coverage quickly with the default Python - coverage run --source bigchaindb py.test - - coverage report -m - coverage html - $(BROWSER) htmlcov/index.html + pytest -v -n auto --cov=bigchaindb --cov-report term --cov-report html + $(BROWSER) htmlcov/index.html docs: ## generate Sphinx HTML documentation, including API docs $(MAKE) -C docs/root clean @@ -74,7 +70,7 @@ docs: ## generate Sphinx HTML documentation, including API docs servedocs: docs ## compile the docs watching for changes watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . -release: clean ## package and upload a release +release: dist ## package and upload a release twine upload dist/* dist: clean ## builds source (and not for now, wheel package) diff --git a/PYTHON_STYLE_GUIDE.md b/PYTHON_STYLE_GUIDE.md index befe4eeb..5ca44e83 100644 --- a/PYTHON_STYLE_GUIDE.md +++ b/PYTHON_STYLE_GUIDE.md @@ -82,6 +82,6 @@ flake8 --max-line-length 119 bigchaindb/ ## Writing and Running (Python) Tests -The content of this section was moved to [`bigchiandb/tests/README.md`](./tests/README.md). +The content of this section was moved to [`bigchaindb/tests/README.md`](./tests/README.md). -Note: We automatically run all tests on all pull requests (using Travis CI), so you should definitely run all tests locally before you submit a pull request. See the above-linked README file for instructions. \ No newline at end of file +Note: We automatically run all tests on all pull requests (using Travis CI), so you should definitely run all tests locally before you submit a pull request. See the above-linked README file for instructions. diff --git a/Release_Process.md b/Release_Process.md index a4e3d427..e4a988a1 100644 --- a/Release_Process.md +++ b/Release_Process.md @@ -14,10 +14,8 @@ A minor release is preceeded by a feature freeze and created from the 'master' b 1. In `bigchaindb/version.py`, update `__version__` and `__short_version__`, e.g. to `0.9` and `0.9.0` (with no `.dev` on the end) 1. Commit that change, and push the new branch to GitHub 1. Follow steps outlined in [Common Steps](#common-steps) -1. In 'master' branch, Edit `bigchaindb/version.py`, increment the minor version to the next planned release, e.g. `0.10.0.dev' -This is so people reading the latest docs will know that they're for the latest (master branch) -version of BigchainDB Server, not the docs at the time of the most recent release (which are also -available). +1. In 'master' branch, Edit `bigchaindb/version.py`, increment the minor version to the next planned release, e.g. `0.10.0.dev`. This is so people reading the latest docs will know that they're for the latest (master branch) version of BigchainDB Server, not the docs at the time of the most recent release (which are also available). +1. Go to [Docker Hub](https://hub.docker.com/), sign in, go to Settings - Build Settings, and under the build with Docker Tag Name equal to `latest`, change the Name to the number of the new release, e.g. `0.9` Congratulations, you have released BigchainDB! @@ -29,6 +27,7 @@ A patch release is similar to a minor release, but piggybacks on an existing min 1. Apply the changes you want, e.g. using `git cherry-pick`. 1. Update the `CHANGELOG.md` file 1. Increment the patch version in `bigchaindb/version.py`, e.g. "0.9.1" +1. Commit that change, and push the updated branch to GitHub 1. Follow steps outlined in [Common Steps](#common-steps) 1. Cherry-pick the `CHANGELOG.md` update commit (made above) to the `master` branch @@ -47,10 +46,16 @@ These steps are common between minor and patch releases: 1. Make sure your local Git is in the same state as the release: e.g. `git fetch ` and `git checkout v0.9.1` 1. Make sure you have a `~/.pypirc` file containing credentials for PyPI 1. Do a `make release` to build and publish the new `bigchaindb` package on PyPI -1. Login to readthedocs.org as a maintainer of the BigchainDB Server docs. - Go to Admin --> Versions and under **Choose Active Versions**, make sure that the new version's tag is - "Active" and "Public", and make sure the new version's branch - (without the 'v' in front) is _not_ active -1. Also in readthedocs.org, go to Admin --> Advanced Settings - and make sure that "Default branch:" (i.e. what "latest" points to) - is set to the new release's tag, e.g. `v0.9.1`. (Don't miss the 'v' in front.) +1. [Login to readthedocs.org](https://readthedocs.org/accounts/login/) + as a maintainer of the BigchainDB Server docs, and: + - Go to Admin --> Advanced Settings + and make sure that "Default branch:" (i.e. what "latest" points to) + is set to the new release's tag, e.g. `v0.9.1`. + (Don't miss the 'v' in front.) + - Go to Admin --> Versions + and under **Choose Active Versions**, do these things: + 1. Make sure that the new version's tag is "Active" and "Public" + 2. Make sure the new version's branch + (without the 'v' in front) is _not_ active. + 3. Make sure the **stable** branch is _not_ active. + 4. Scroll to the bottom of the page and click the Submit button. diff --git a/benchmarking-tests/README.md b/benchmarking-tests/README.md deleted file mode 100644 index 3ae00969..00000000 --- a/benchmarking-tests/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Benchmarking tests - -This folder contains util files and test case folders to benchmark the performance of a BigchainDB federation. \ No newline at end of file diff --git a/benchmarking-tests/benchmark_utils.py b/benchmarking-tests/benchmark_utils.py deleted file mode 100644 index d7418a36..00000000 --- a/benchmarking-tests/benchmark_utils.py +++ /dev/null @@ -1,154 +0,0 @@ -import multiprocessing as mp -import uuid -import argparse -import csv -import time -import logging -import rethinkdb as r - -from bigchaindb.common.transaction import Transaction - -from bigchaindb import Bigchain -from bigchaindb.utils import ProcessGroup -from bigchaindb.commands import utils - - -SIZE_OF_FILLER = {'minimal': 0, - 'small': 10**3, - 'medium': 10**4, - 'large': 10**5} - - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def create_write_transaction(tx_left, payload_filler): - b = Bigchain() - payload_dict = {} - if payload_filler: - payload_dict['filler'] = payload_filler - while tx_left > 0: - # Include a random uuid string in the payload - # to prevent duplicate transactions - # (i.e. transactions with the same hash) - payload_dict['msg'] = str(uuid.uuid4()) - tx = Transaction.create([b.me], [b.me], payload=payload_dict) - tx = tx.sign([b.me_private]) - b.write_transaction(tx) - tx_left -= 1 - - -def run_add_backlog(args): - tx_left = args.num_transactions // mp.cpu_count() - payload_filler = 'x' * SIZE_OF_FILLER[args.payload_size] - workers = ProcessGroup(target=create_write_transaction, - args=(tx_left, payload_filler)) - workers.start() - - -def run_gather_metrics(args): - # setup a rethinkdb connection - conn = r.connect(args.bigchaindb_host, 28015, 'bigchain') - - # setup csv writer - csv_file = open(args.csvfile, 'w') - csv_writer = csv.writer(csv_file) - - # query for the number of transactions on the backlog - num_transactions = r.table('backlog').count().run(conn) - num_transactions_received = 0 - initial_time = None - logger.info('Starting gathering metrics.') - logger.info('{} transasctions in the backlog'.format(num_transactions)) - logger.info('This process should exit automatically. ' - 'If this does not happen you can exit at any time using Ctrl-C ' - 'saving all the metrics gathered up to this point.') - - logger.info('\t{:<20} {:<20} {:<20} {:<20}'.format( - 'timestamp', - 'tx in block', - 'tx/s', - '% complete' - )) - - # listen to the changefeed - try: - for change in r.table('bigchain').changes().run(conn): - # check only for new blocks - if change['old_val'] is None: - block_num_transactions = len( - change['new_val']['block']['transactions'] - ) - time_now = time.time() - csv_writer.writerow( - [str(time_now), str(block_num_transactions)] - ) - - # log statistics - if initial_time is None: - initial_time = time_now - - num_transactions_received += block_num_transactions - elapsed_time = time_now - initial_time - percent_complete = round( - (num_transactions_received / num_transactions) * 100 - ) - - if elapsed_time != 0: - transactions_per_second = round( - num_transactions_received / elapsed_time - ) - else: - transactions_per_second = float('nan') - - logger.info('\t{:<20} {:<20} {:<20} {:<20}'.format( - time_now, - block_num_transactions, - transactions_per_second, - percent_complete - )) - - if (num_transactions - num_transactions_received) == 0: - break - except KeyboardInterrupt: - logger.info('Interrupted. Exiting early...') - finally: - # close files - csv_file.close() - - -def main(): - parser = argparse.ArgumentParser(description='BigchainDB benchmarking utils') - subparsers = parser.add_subparsers(title='Commands', dest='command') - - # add transactions to backlog - backlog_parser = subparsers.add_parser('add-backlog', - help='Add transactions to the backlog') - backlog_parser.add_argument('num_transactions', - metavar='num_transactions', - type=int, default=0, - help='Number of transactions to add to the backlog') - backlog_parser.add_argument('-s', '--payload-size', - choices=SIZE_OF_FILLER.keys(), - default='minimal', - help='Payload size') - - # metrics - metrics_parser = subparsers.add_parser('gather-metrics', - help='Gather metrics to a csv file') - - metrics_parser.add_argument('-b', '--bigchaindb-host', - required=True, - help=('Bigchaindb node hostname to connect ' - 'to gather cluster metrics')) - - metrics_parser.add_argument('-c', '--csvfile', - required=True, - help='Filename to save the metrics') - - utils.start(parser, globals()) - - -if __name__ == '__main__': - main() diff --git a/benchmarking-tests/fabfile.py b/benchmarking-tests/fabfile.py deleted file mode 100644 index 0dd4e964..00000000 --- a/benchmarking-tests/fabfile.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import with_statement, unicode_literals - -from fabric.api import sudo, env, hosts -from fabric.api import task, parallel -from fabric.contrib.files import sed -from fabric.operations import run, put -from fabric.context_managers import settings - -from hostlist import public_dns_names -from ssh_key import ssh_key_path - -# Ignore known_hosts -# http://docs.fabfile.org/en/1.10/usage/env.html#disable-known-hosts -env.disable_known_hosts = True - -# What remote servers should Fabric connect to? With what usernames? -env.user = 'ubuntu' -env.hosts = public_dns_names - -# SSH key files to try when connecting: -# http://docs.fabfile.org/en/1.10/usage/env.html#key-filename -env.key_filename = ssh_key_path - - -@task -@parallel -def put_benchmark_utils(): - put('benchmark_utils.py') - - -@task -@parallel -def prepare_backlog(num_transactions=10000): - run('python3 benchmark_utils.py add-backlog {}'.format(num_transactions)) - - -@task -@parallel -def start_bigchaindb(): - run('screen -d -m bigchaindb start &', pty=False) - - -@task -@parallel -def kill_bigchaindb(): - run('killall bigchaindb') diff --git a/benchmarking-tests/test1/README.md b/benchmarking-tests/test1/README.md deleted file mode 100644 index 38a4569b..00000000 --- a/benchmarking-tests/test1/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Transactions per second - -Measure how many blocks per second are created on the _bigchain_ with a pre filled backlog. - -1. Deploy an aws cluster https://docs.bigchaindb.com/projects/server/en/latest/clusters-feds/aws-testing-cluster.html -2. Make a symbolic link to hostlist.py: `ln -s ../deploy-cluster-aws/hostlist.py .` -3. Make a symbolic link to bigchaindb.pem: -```bash -mkdir pem -cd pem -ln -s ../deploy-cluster-aws/pem/bigchaindb.pem . -``` - -Then: - -```bash -fab put_benchmark_utils -fab prepare_backlog: # wait for process to finish -fab start_bigchaindb -``` diff --git a/bigchaindb/README.md b/bigchaindb/README.md index dbb59a1e..cd177c85 100644 --- a/bigchaindb/README.md +++ b/bigchaindb/README.md @@ -12,7 +12,7 @@ The `Bigchain` class is defined here. Most operations outlined in the [whitepap ### [`models.py`](./models.py) -`Block`, `Transaction`, and `Asset` classes are defined here. The classes mirror the block and transaction structure from the [documentation](https://docs.bigchaindb.com/projects/server/en/latest/topic-guides/models.html), but also include methods for validation and signing. +`Block`, `Transaction`, and `Asset` classes are defined here. The classes mirror the block and transaction structure from the [documentation](https://docs.bigchaindb.com/projects/server/en/latest/data-models/index.html), but also include methods for validation and signing. ### [`consensus.py`](./consensus.py) diff --git a/bigchaindb/__init__.py b/bigchaindb/__init__.py index 10e9e6ce..9c981385 100644 --- a/bigchaindb/__init__.py +++ b/bigchaindb/__init__.py @@ -1,25 +1,54 @@ import copy +import logging import os +from bigchaindb.log.configs import SUBSCRIBER_LOGGING_CONFIG as log_config + # from functools import reduce # PORT_NUMBER = reduce(lambda x, y: x * y, map(ord, 'BigchainDB')) % 2**16 # basically, the port number is 9984 -_database_rethinkdb = { - 'backend': os.environ.get('BIGCHAINDB_DATABASE_BACKEND', 'rethinkdb'), + +_base_database_rethinkdb = { 'host': os.environ.get('BIGCHAINDB_DATABASE_HOST', 'localhost'), 'port': int(os.environ.get('BIGCHAINDB_DATABASE_PORT', 28015)), 'name': os.environ.get('BIGCHAINDB_DATABASE_NAME', 'bigchain'), } -_database_mongodb = { - 'backend': os.environ.get('BIGCHAINDB_DATABASE_BACKEND', 'mongodb'), +# The following variable is used by `bigchaindb configure` to +# prompt the user for database values. We cannot rely on +# _base_database_rethinkdb.keys() or _base_database_mongodb.keys() +# because dicts are unordered. I tried to configure + +_database_keys_map = { + 'mongodb': ('host', 'port', 'name', 'replicaset'), + 'rethinkdb': ('host', 'port', 'name') +} + +_base_database_mongodb = { 'host': os.environ.get('BIGCHAINDB_DATABASE_HOST', 'localhost'), 'port': int(os.environ.get('BIGCHAINDB_DATABASE_PORT', 27017)), 'name': os.environ.get('BIGCHAINDB_DATABASE_NAME', 'bigchain'), 'replicaset': os.environ.get('BIGCHAINDB_DATABASE_REPLICASET', 'bigchain-rs'), + 'ssl': bool(os.environ.get('BIGCHAINDB_DATABASE_SSL', False)), + 'login': os.environ.get('BIGCHAINDB_DATABASE_LOGIN'), + 'password': os.environ.get('BIGCHAINDB_DATABASE_PASSWORD') } +_database_rethinkdb = { + 'backend': os.environ.get('BIGCHAINDB_DATABASE_BACKEND', 'rethinkdb'), + 'connection_timeout': 5000, + 'max_tries': 3, +} +_database_rethinkdb.update(_base_database_rethinkdb) + +_database_mongodb = { + 'backend': os.environ.get('BIGCHAINDB_DATABASE_BACKEND', 'mongodb'), + 'connection_timeout': 5000, + 'max_tries': 3, +} +_database_mongodb.update(_base_database_mongodb) + _database_map = { 'mongodb': _database_mongodb, 'rethinkdb': _database_rethinkdb @@ -30,9 +59,15 @@ config = { # Note: this section supports all the Gunicorn settings: # - http://docs.gunicorn.org/en/stable/settings.html 'bind': os.environ.get('BIGCHAINDB_SERVER_BIND') or 'localhost:9984', + 'loglevel': logging.getLevelName( + log_config['handlers']['console']['level']).lower(), 'workers': None, # if none, the value will be cpu_count * 2 + 1 'threads': None, # if none, the value will be cpu_count * 2 + 1 }, + 'wsserver': { + 'host': os.environ.get('BIGCHAINDB_WSSERVER_HOST') or 'localhost', + 'port': int(os.environ.get('BIGCHAINDB_WSSERVER_PORT', 9985)), + }, 'database': _database_map[ os.environ.get('BIGCHAINDB_DATABASE_BACKEND', 'rethinkdb') ], @@ -41,7 +76,20 @@ config = { 'private': None, }, 'keyring': [], - 'backlog_reassign_delay': 120 + 'backlog_reassign_delay': 120, + 'log': { + 'file': log_config['handlers']['file']['filename'], + 'error_file': log_config['handlers']['errors']['filename'], + 'level_console': logging.getLevelName( + log_config['handlers']['console']['level']).lower(), + 'level_logfile': logging.getLevelName( + log_config['handlers']['file']['level']).lower(), + 'datefmt_console': log_config['formatters']['console']['datefmt'], + 'datefmt_logfile': log_config['formatters']['file']['datefmt'], + 'fmt_console': log_config['formatters']['console']['format'], + 'fmt_logfile': log_config['formatters']['file']['format'], + 'granular_levels': {}, + }, } # We need to maintain a backup copy of the original config dict in case diff --git a/bigchaindb/backend/connection.py b/bigchaindb/backend/connection.py index df21321d..b717703b 100644 --- a/bigchaindb/backend/connection.py +++ b/bigchaindb/backend/connection.py @@ -1,8 +1,10 @@ +from itertools import repeat from importlib import import_module import logging import bigchaindb from bigchaindb.common.exceptions import ConfigurationError +from bigchaindb.backend.exceptions import ConnectionError BACKENDS = { @@ -13,7 +15,8 @@ BACKENDS = { logger = logging.getLogger(__name__) -def connect(backend=None, host=None, port=None, name=None, replicaset=None): +def connect(backend=None, host=None, port=None, name=None, max_tries=None, + connection_timeout=None, replicaset=None, ssl=None, login=None, password=None): """Create a new connection to the database backend. All arguments default to the current configuration's values if not @@ -47,6 +50,9 @@ def connect(backend=None, host=None, port=None, name=None, replicaset=None): # to handle these these additional args. In case of RethinkDBConnection # it just does not do anything with it. replicaset = replicaset or bigchaindb.config['database'].get('replicaset') + ssl = ssl if ssl is not None else bigchaindb.config['database'].get('ssl', False) + login = login or bigchaindb.config['database'].get('login') + password = password or bigchaindb.config['database'].get('password') try: module_name, _, class_name = BACKENDS[backend].rpartition('.') @@ -58,7 +64,9 @@ def connect(backend=None, host=None, port=None, name=None, replicaset=None): raise ConfigurationError('Error loading backend `{}`'.format(backend)) from exc logger.debug('Connection: {}'.format(Class)) - return Class(host, port, dbname, replicaset=replicaset) + return Class(host=host, port=port, dbname=dbname, + max_tries=max_tries, connection_timeout=connection_timeout, + replicaset=replicaset, ssl=ssl, login=login, password=password) class Connection: @@ -68,17 +76,41 @@ class Connection: from and implements this class. """ - def __init__(self, host=None, port=None, dbname=None, *args, **kwargs): + def __init__(self, host=None, port=None, dbname=None, + connection_timeout=None, max_tries=None, + **kwargs): """Create a new :class:`~.Connection` instance. Args: host (str): the host to connect to. port (int): the port to connect to. dbname (str): the name of the database to use. + connection_timeout (int, optional): the milliseconds to wait + until timing out the database connection attempt. + Defaults to 5000ms. + max_tries (int, optional): how many tries before giving up, + if 0 then try forever. Defaults to 3. **kwargs: arbitrary keyword arguments provided by the configuration's ``database`` settings """ + dbconf = bigchaindb.config['database'] + + self.host = host or dbconf['host'] + self.port = port or dbconf['port'] + self.dbname = dbname or dbconf['name'] + self.connection_timeout = connection_timeout if connection_timeout is not None\ + else dbconf['connection_timeout'] + self.max_tries = max_tries if max_tries is not None else dbconf['max_tries'] + self.max_tries_counter = range(self.max_tries) if self.max_tries != 0 else repeat(0) + self._conn = None + + @property + def conn(self): + if self._conn is None: + self.connect() + return self._conn + def run(self, query): """Run a query. @@ -94,3 +126,26 @@ class Connection: """ raise NotImplementedError() + + def connect(self): + """Try to connect to the database. + + Raises: + :exc:`~ConnectionError`: If the connection to the database + fails. + """ + + attempt = 0 + for i in self.max_tries_counter: + attempt += 1 + try: + self._conn = self._connect() + except ConnectionError as exc: + logger.warning('Attempt %s/%s. Connection to %s:%s failed after %sms.', + attempt, self.max_tries if self.max_tries != 0 else '∞', + self.host, self.port, self.connection_timeout) + if attempt == self.max_tries: + logger.critical('Cannot connect to the Database. Giving up.') + raise ConnectionError() from exc + else: + break diff --git a/bigchaindb/backend/exceptions.py b/bigchaindb/backend/exceptions.py index 3b712b08..017e19e4 100644 --- a/bigchaindb/backend/exceptions.py +++ b/bigchaindb/backend/exceptions.py @@ -15,7 +15,3 @@ class OperationError(BackendError): class DuplicateKeyError(OperationError): """Exception raised when an insert fails because the key is not unique""" - - -class BigchainDBCritical(Exception): - """Unhandleable error that requires attention""" diff --git a/bigchaindb/backend/mongodb/connection.py b/bigchaindb/backend/mongodb/connection.py index d01d5861..5c54470a 100644 --- a/bigchaindb/backend/mongodb/connection.py +++ b/bigchaindb/backend/mongodb/connection.py @@ -1,6 +1,5 @@ import time import logging -from itertools import repeat import pymongo @@ -15,46 +14,23 @@ from bigchaindb.backend.connection import Connection logger = logging.getLogger(__name__) -# TODO: waiting for #1082 to be merged -# to move this constants in the configuration. - -CONNECTION_TIMEOUT = 4000 # in milliseconds -MAX_RETRIES = 3 # number of tries before giving up, if 0 then try forever - - class MongoDBConnection(Connection): - def __init__(self, host=None, port=None, dbname=None, - connection_timeout=None, max_tries=None, - replicaset=None): + def __init__(self, replicaset=None, ssl=None, login=None, password=None, **kwargs): """Create a new Connection instance. Args: - host (str, optional): the host to connect to. - port (int, optional): the port to connect to. - dbname (str, optional): the database to use. - connection_timeout (int, optional): the milliseconds to wait - until timing out the database connection attempt. - max_tries (int, optional): how many tries before giving up, - if 0 then try forever. replicaset (str, optional): the name of the replica set to connect to. + **kwargs: arbitrary keyword arguments provided by the + configuration's ``database`` settings """ - self.host = host or bigchaindb.config['database']['host'] - self.port = port or bigchaindb.config['database']['port'] + super().__init__(**kwargs) self.replicaset = replicaset or bigchaindb.config['database']['replicaset'] - self.dbname = dbname or bigchaindb.config['database']['name'] - self.connection_timeout = connection_timeout if connection_timeout is not None else CONNECTION_TIMEOUT - self.max_tries = max_tries if max_tries is not None else MAX_RETRIES - self.max_tries_counter = range(self.max_tries) if self.max_tries != 0 else repeat(0) - self.connection = None - - @property - def conn(self): - if self.connection is None: - self._connect() - return self.connection + self.ssl = ssl if ssl is not None else bigchaindb.config['database'].get('ssl', False) + self.login = login or bigchaindb.config['database'].get('login') + self.password = password or bigchaindb.config['database'].get('password') @property def db(self): @@ -94,37 +70,33 @@ class MongoDBConnection(Connection): fails. """ - attempt = 0 - for i in self.max_tries_counter: - attempt += 1 + try: + # we should only return a connection if the replica set is + # initialized. initialize_replica_set will check if the + # replica set is initialized else it will initialize it. + initialize_replica_set(self.host, self.port, self.connection_timeout, + self.dbname, self.ssl, self.login, self.password) - try: - # we should only return a connection if the replica set is - # initialized. initialize_replica_set will check if the - # replica set is initialized else it will initialize it. - initialize_replica_set(self.host, self.port, self.connection_timeout) + # FYI: this might raise a `ServerSelectionTimeoutError`, + # that is a subclass of `ConnectionFailure`. + client = pymongo.MongoClient(self.host, + self.port, + replicaset=self.replicaset, + serverselectiontimeoutms=self.connection_timeout, + ssl=self.ssl) - # FYI: this might raise a `ServerSelectionTimeoutError`, - # that is a subclass of `ConnectionFailure`. - self.connection = pymongo.MongoClient(self.host, - self.port, - replicaset=self.replicaset, - serverselectiontimeoutms=self.connection_timeout) + if self.login is not None and self.password is not None: + client[self.dbname].authenticate(self.login, self.password) - # `initialize_replica_set` might raise `ConnectionFailure` or `OperationFailure`. - except (pymongo.errors.ConnectionFailure, - pymongo.errors.OperationFailure) as exc: - logger.warning('Attempt %s/%s. Connection to %s:%s failed after %sms.', - attempt, self.max_tries if self.max_tries != 0 else '∞', - self.host, self.port, self.connection_timeout) - if attempt == self.max_tries: - logger.critical('Cannot connect to the Database. Giving up.') - raise ConnectionError() from exc - else: - break + return client + + # `initialize_replica_set` might raise `ConnectionFailure` or `OperationFailure`. + except (pymongo.errors.ConnectionFailure, + pymongo.errors.OperationFailure) as exc: + raise ConnectionError() from exc -def initialize_replica_set(host, port, connection_timeout): +def initialize_replica_set(host, port, connection_timeout, dbname, ssl, login, password): """Initialize a replica set. If already initialized skip.""" # Setup a MongoDB connection @@ -133,7 +105,12 @@ def initialize_replica_set(host, port, connection_timeout): # you try to connect to a replica set that is not yet initialized conn = pymongo.MongoClient(host=host, port=port, - serverselectiontimeoutms=connection_timeout) + serverselectiontimeoutms=connection_timeout, + ssl=ssl) + + if login is not None and password is not None: + conn[dbname].authenticate(login, password) + _check_replica_set(conn) host = '{}:{}'.format(bigchaindb.config['database']['host'], bigchaindb.config['database']['port']) @@ -168,7 +145,7 @@ def _check_replica_set(conn): options = conn.admin.command('getCmdLineOpts') try: repl_opts = options['parsed']['replication'] - repl_set_name = repl_opts.get('replSetName', None) or repl_opts['replSet'] + repl_set_name = repl_opts.get('replSetName', repl_opts.get('replSet')) except KeyError: raise ConfigurationError('mongod was not started with' ' the replSet option.') diff --git a/bigchaindb/backend/mongodb/query.py b/bigchaindb/backend/mongodb/query.py index 1988db04..74b9c35a 100644 --- a/bigchaindb/backend/mongodb/query.py +++ b/bigchaindb/backend/mongodb/query.py @@ -153,14 +153,22 @@ def get_spent(conn, transaction_id, output): cursor = conn.run( conn.collection('bigchain').aggregate([ {'$match': { - 'block.transactions.inputs.fulfills.txid': transaction_id, - 'block.transactions.inputs.fulfills.output': output + 'block.transactions.inputs': { + '$elemMatch': { + 'fulfills.txid': transaction_id, + 'fulfills.output': output, + }, + }, }}, {'$unwind': '$block.transactions'}, {'$match': { - 'block.transactions.inputs.fulfills.txid': transaction_id, - 'block.transactions.inputs.fulfills.output': output - }} + 'block.transactions.inputs': { + '$elemMatch': { + 'fulfills.txid': transaction_id, + 'fulfills.output': output, + }, + }, + }}, ])) # we need to access some nested fields before returning so lets use a # generator to avoid having to read all records on the cursor at this point diff --git a/bigchaindb/backend/mongodb/schema.py b/bigchaindb/backend/mongodb/schema.py index 4c5189ac..ad89f9bc 100644 --- a/bigchaindb/backend/mongodb/schema.py +++ b/bigchaindb/backend/mongodb/schema.py @@ -100,4 +100,5 @@ def create_votes_secondary_index(conn, dbname): ASCENDING), ('node_pubkey', ASCENDING)], - name='block_and_voter') + name='block_and_voter', + unique=True) diff --git a/bigchaindb/backend/rethinkdb/admin.py b/bigchaindb/backend/rethinkdb/admin.py index 23b55048..863ffb31 100644 --- a/bigchaindb/backend/rethinkdb/admin.py +++ b/bigchaindb/backend/rethinkdb/admin.py @@ -96,7 +96,7 @@ def reconfigure(connection, *, table, shards, replicas, try: return connection.run(r.table(table).reconfigure(**params)) except (r.ReqlOpFailedError, r.ReqlQueryLogicError) as e: - raise OperationError from e + raise OperationError('Failed to reconfigure tables.') from e @register_admin(RethinkDBConnection) diff --git a/bigchaindb/backend/rethinkdb/changefeed.py b/bigchaindb/backend/rethinkdb/changefeed.py index e762d905..390ada9a 100644 --- a/bigchaindb/backend/rethinkdb/changefeed.py +++ b/bigchaindb/backend/rethinkdb/changefeed.py @@ -3,6 +3,7 @@ import logging import rethinkdb as r from bigchaindb import backend +from bigchaindb.backend.exceptions import BackendError from bigchaindb.backend.changefeed import ChangeFeed from bigchaindb.backend.utils import module_dispatch_registrar from bigchaindb.backend.rethinkdb.connection import RethinkDBConnection @@ -23,8 +24,8 @@ class RethinkDBChangeFeed(ChangeFeed): try: self.run_changefeed() break - except (r.ReqlDriverError, r.ReqlOpFailedError) as exc: - logger.exception(exc) + except (BackendError, r.ReqlDriverError) as exc: + logger.exception('Error connecting to the database, retrying') time.sleep(1) def run_changefeed(self): diff --git a/bigchaindb/backend/rethinkdb/connection.py b/bigchaindb/backend/rethinkdb/connection.py index 988573f6..e917e326 100644 --- a/bigchaindb/backend/rethinkdb/connection.py +++ b/bigchaindb/backend/rethinkdb/connection.py @@ -1,11 +1,7 @@ -import time -import logging - import rethinkdb as r from bigchaindb.backend.connection import Connection - -logger = logging.getLogger(__name__) +from bigchaindb.backend.exceptions import ConnectionError, OperationError class RethinkDBConnection(Connection): @@ -17,23 +13,6 @@ class RethinkDBConnection(Connection): more times to run the query or open a connection. """ - def __init__(self, host, port, dbname, max_tries=3, **kwargs): - """Create a new :class:`~.RethinkDBConnection` instance. - - See :meth:`.Connection.__init__` for - :attr:`host`, :attr:`port`, and :attr:`dbname`. - - Args: - max_tries (int, optional): how many tries before giving up. - Defaults to 3. - """ - - self.host = host - self.port = port - self.dbname = dbname - self.max_tries = max_tries - self.conn = None - def run(self, query): """Run a RethinkDB query. @@ -45,16 +24,10 @@ class RethinkDBConnection(Connection): :attr:`~.RethinkDBConnection.max_tries`. """ - if self.conn is None: - self._connect() - - for i in range(self.max_tries): - try: - return query.run(self.conn) - except r.ReqlDriverError: - if i + 1 == self.max_tries: - raise - self._connect() + try: + return query.run(self.conn) + except r.ReqlDriverError as exc: + raise OperationError from exc def _connect(self): """Set a connection to RethinkDB. @@ -66,16 +39,7 @@ class RethinkDBConnection(Connection): :attr:`~.RethinkDBConnection.max_tries`. """ - for i in range(1, self.max_tries + 1): - logging.debug('Connecting to database %s:%s/%s. (Attempt %s/%s)', - self.host, self.port, self.dbname, i, self.max_tries) - try: - self.conn = r.connect(host=self.host, port=self.port, db=self.dbname) - except r.ReqlDriverError: - if i == self.max_tries: - raise - wait_time = 2**i - logging.debug('Error connecting to database, waiting %ss', wait_time) - time.sleep(wait_time) - else: - break + try: + return r.connect(host=self.host, port=self.port, db=self.dbname) + except r.ReqlDriverError as exc: + raise ConnectionError from exc diff --git a/bigchaindb/commands/bigchain.py b/bigchaindb/commands/bigchaindb.py similarity index 68% rename from bigchaindb/commands/bigchain.py rename to bigchaindb/commands/bigchaindb.py index c118f857..a46019da 100644 --- a/bigchaindb/commands/bigchain.py +++ b/bigchaindb/commands/bigchaindb.py @@ -3,50 +3,48 @@ the command-line interface (CLI) for BigchainDB Server. """ import os -import sys import logging import argparse import copy import json -import builtins - -import logstats +import sys from bigchaindb.common import crypto from bigchaindb.common.exceptions import (StartupError, DatabaseAlreadyExists, - KeypairNotFoundException) + KeypairNotFoundException, + DatabaseDoesNotExist) import bigchaindb -import bigchaindb.config_utils -from bigchaindb.models import Transaction -from bigchaindb.utils import ProcessGroup -from bigchaindb import backend +from bigchaindb import backend, processes from bigchaindb.backend import schema from bigchaindb.backend.admin import (set_replicas, set_shards, add_replicas, remove_replicas) from bigchaindb.backend.exceptions import OperationError from bigchaindb.commands import utils -from bigchaindb import processes +from bigchaindb.commands.messages import ( + CANNOT_START_KEYPAIR_NOT_FOUND, + RETHINKDB_STARTUP_ERROR, +) +from bigchaindb.commands.utils import ( + configure_bigchaindb, start_logging_process, input_on_stderr) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# We need this because `input` always prints on stdout, while it should print -# to stderr. It's a very old bug, check it out here: -# - https://bugs.python.org/issue1927 -def input_on_stderr(prompt=''): - print(prompt, end='', file=sys.stderr) - return builtins.input() +# Note about printing: +# We try to print to stdout for results of a command that may be useful to +# someone (or another program). Strictly informational text, or errors, +# should be printed to stderr. +@configure_bigchaindb def run_show_config(args): """Show the current configuration""" # TODO Proposal: remove the "hidden" configuration. Only show config. If # the system needs to be configured, then display information on how to # configure the system. - bigchaindb.config_utils.autoconfigure(filename=args.config, force=True) config = copy.deepcopy(bigchaindb.config) del config['CONFIGURED'] private_key = config['keypair']['private'] @@ -89,27 +87,26 @@ def run_configure(args, skip_if_exists=False): # select the correct config defaults based on the backend print('Generating default configuration for backend {}' - .format(args.backend)) + .format(args.backend), file=sys.stderr) + database_keys = bigchaindb._database_keys_map[args.backend] conf['database'] = bigchaindb._database_map[args.backend] if not args.yes: for key in ('bind', ): val = conf['server'][key] - conf['server'][key] = \ - input_on_stderr('API Server {}? (default `{}`): '.format(key, val)) \ - or val + conf['server'][key] = input_on_stderr('API Server {}? (default `{}`): '.format(key, val), val) - for key in ('host', 'port', 'name'): + for key in ('host', 'port'): + val = conf['wsserver'][key] + conf['wsserver'][key] = input_on_stderr('WebSocket Server {}? (default `{}`): '.format(key, val), val) + + for key in database_keys: val = conf['database'][key] - conf['database'][key] = \ - input_on_stderr('Database {}? (default `{}`): '.format(key, val)) \ - or val + conf['database'][key] = input_on_stderr('Database {}? (default `{}`): '.format(key, val), val) val = conf['backlog_reassign_delay'] - conf['backlog_reassign_delay'] = \ - input_on_stderr(('Stale transaction reassignment delay (in ' - 'seconds)? (default `{}`): '.format(val))) \ - or val + conf['backlog_reassign_delay'] = input_on_stderr( + 'Stale transaction reassignment delay (in seconds)? (default `{}`): '.format(val), val) if config_path != '-': bigchaindb.config_utils.write_config(conf, config_path) @@ -119,11 +116,10 @@ def run_configure(args, skip_if_exists=False): print('Ready to go!', file=sys.stderr) +@configure_bigchaindb def run_export_my_pubkey(args): """Export this node's public key to standard output """ - logger.debug('bigchaindb args = {}'.format(args)) - bigchaindb.config_utils.autoconfigure(filename=args.config, force=True) pubkey = bigchaindb.config['keypair']['public'] if pubkey is not None: print(pubkey) @@ -141,14 +137,13 @@ def _run_init(): schema.init_database(connection=b.connection) - logger.info('Create genesis block.') b.create_genesis_block() - logger.info('Done, have fun!') + logger.info('Genesis block created.') +@configure_bigchaindb def run_init(args): """Initialize the database""" - bigchaindb.config_utils.autoconfigure(filename=args.config, force=True) # TODO Provide mechanism to: # 1. prompt the user to inquire whether they wish to drop the db # 2. force the init, (e.g., via -f flag) @@ -159,9 +154,9 @@ def run_init(args): print('If you wish to re-initialize it, first drop it.', file=sys.stderr) +@configure_bigchaindb def run_drop(args): """Drop the database""" - bigchaindb.config_utils.autoconfigure(filename=args.config, force=True) dbname = bigchaindb.config['database']['name'] if not args.yes: @@ -171,14 +166,17 @@ def run_drop(args): conn = backend.connect() dbname = bigchaindb.config['database']['name'] - schema.drop_database(conn, dbname) + try: + schema.drop_database(conn, dbname) + except DatabaseDoesNotExist: + print("Cannot drop '{name}'. The database does not exist.".format(name=dbname), file=sys.stderr) +@configure_bigchaindb +@start_logging_process def run_start(args): """Start the processes to run the node""" - logger.info('BigchainDB Version {}'.format(bigchaindb.__version__)) - - bigchaindb.config_utils.autoconfigure(filename=args.config, force=True) + logger.info('BigchainDB Version %s', bigchaindb.__version__) if args.allow_temp_keypair: if not (bigchaindb.config['keypair']['private'] or @@ -194,7 +192,7 @@ def run_start(args): try: proc = utils.start_rethinkdb() except StartupError as e: - sys.exit('Error starting RethinkDB, reason is: {}'.format(e)) + sys.exit(RETHINKDB_STARTUP_ERROR.format(e)) logger.info('RethinkDB started with PID %s' % proc.pid) try: @@ -202,87 +200,55 @@ def run_start(args): except DatabaseAlreadyExists: pass except KeypairNotFoundException: - sys.exit("Can't start BigchainDB, no keypair found. " - 'Did you run `bigchaindb configure`?') + sys.exit(CANNOT_START_KEYPAIR_NOT_FOUND) logger.info('Starting BigchainDB main process with public key %s', bigchaindb.config['keypair']['public']) processes.start() -def _run_load(tx_left, stats): - logstats.thread.start(stats) - b = bigchaindb.Bigchain() - - while True: - tx = Transaction.create([b.me], [([b.me], 1)]) - tx = tx.sign([b.me_private]) - b.write_transaction(tx) - - stats['transactions'] += 1 - - if tx_left is not None: - tx_left -= 1 - if tx_left == 0: - break - - -def run_load(args): - bigchaindb.config_utils.autoconfigure(filename=args.config, force=True) - logger.info('Starting %s processes', args.multiprocess) - stats = logstats.Logstats() - logstats.thread.start(stats) - - tx_left = None - if args.count > 0: - tx_left = int(args.count / args.multiprocess) - - workers = ProcessGroup(concurrency=args.multiprocess, - target=_run_load, - args=(tx_left, stats.get_child())) - workers.start() - - +@configure_bigchaindb def run_set_shards(args): conn = backend.connect() try: set_shards(conn, shards=args.num_shards) except OperationError as e: - logger.warn(e) + sys.exit(str(e)) +@configure_bigchaindb def run_set_replicas(args): conn = backend.connect() try: set_replicas(conn, replicas=args.num_replicas) except OperationError as e: - logger.warn(e) + sys.exit(str(e)) +@configure_bigchaindb def run_add_replicas(args): # Note: This command is specific to MongoDB - bigchaindb.config_utils.autoconfigure(filename=args.config, force=True) conn = backend.connect() try: add_replicas(conn, args.replicas) except (OperationError, NotImplementedError) as e: - logger.warn(e) + sys.exit(str(e)) else: - logger.info('Added {} to the replicaset.'.format(args.replicas)) + print('Added {} to the replicaset.'.format(args.replicas)) +@configure_bigchaindb def run_remove_replicas(args): # Note: This command is specific to MongoDB - bigchaindb.config_utils.autoconfigure(filename=args.config, force=True) conn = backend.connect() try: remove_replicas(conn, args.replicas) except (OperationError, NotImplementedError) as e: - logger.warn(e) + sys.exit(str(e)) else: - logger.info('Removed {} from the replicaset.'.format(args.replicas)) + print('Removed {} from the replicaset.'.format(args.replicas)) def create_parser(): @@ -290,16 +256,6 @@ def create_parser(): description='Control your BigchainDB node.', parents=[utils.base_parser]) - parser.add_argument('--dev-start-rethinkdb', - dest='start_rethinkdb', - action='store_true', - help='Run RethinkDB on start') - - parser.add_argument('--dev-allow-temp-keypair', - dest='allow_temp_keypair', - action='store_true', - help='Generate a random keypair on start') - # all the commands are contained in the subparsers object, # the command selected by the user will be stored in `args.command` # that is used by the `main` function to select which other @@ -331,8 +287,18 @@ def create_parser(): help='Drop the database') # parser for starting BigchainDB - subparsers.add_parser('start', - help='Start BigchainDB') + start_parser = subparsers.add_parser('start', + help='Start BigchainDB') + + start_parser.add_argument('--dev-allow-temp-keypair', + dest='allow_temp_keypair', + action='store_true', + help='Generate a random keypair on start') + + start_parser.add_argument('--dev-start-rethinkdb', + dest='start_rethinkdb', + action='store_true', + help='Run RethinkDB on start') # parser for configuring the number of shards sharding_parser = subparsers.add_parser('set-shards', @@ -375,25 +341,6 @@ def create_parser(): help='A list of space separated hosts to ' 'remove from the replicaset. Each host ' 'should be in the form `host:port`.') - - load_parser = subparsers.add_parser('load', - help='Write transactions to the backlog') - - load_parser.add_argument('-m', '--multiprocess', - nargs='?', - type=int, - default=False, - help='Spawn multiple processes to run the command, ' - 'if no value is provided, the number of processes ' - 'is equal to the number of cores of the host machine') - - load_parser.add_argument('-c', '--count', - default=0, - type=int, - help='Number of transactions to push. If the parameter -m ' - 'is set, the count is distributed equally to all the ' - 'processes') - return parser diff --git a/bigchaindb/commands/messages.py b/bigchaindb/commands/messages.py new file mode 100644 index 00000000..c65fe973 --- /dev/null +++ b/bigchaindb/commands/messages.py @@ -0,0 +1,10 @@ +"""Module to store messages used in commands, such as error messages, +warnings, prompts, etc. + +""" +CANNOT_START_KEYPAIR_NOT_FOUND = ( + "Can't start BigchainDB, no keypair found. " + 'Did you run `bigchaindb configure`?' +) + +RETHINKDB_STARTUP_ERROR = 'Error starting RethinkDB, reason is: {}' diff --git a/bigchaindb/commands/utils.py b/bigchaindb/commands/utils.py index 80ee7a6b..d6840d68 100644 --- a/bigchaindb/commands/utils.py +++ b/bigchaindb/commands/utils.py @@ -3,18 +3,126 @@ for ``argparse.ArgumentParser``. """ import argparse +import builtins +import functools import multiprocessing as mp import subprocess +import sys import rethinkdb as r from pymongo import uri_parser import bigchaindb +import bigchaindb.config_utils from bigchaindb import backend from bigchaindb.common.exceptions import StartupError +from bigchaindb.log.setup import setup_logging from bigchaindb.version import __version__ +def configure_bigchaindb(command): + """Decorator to be used by command line functions, such that the + configuration of bigchaindb is performed before the execution of + the command. + + Args: + command: The command to decorate. + + Returns: + The command wrapper function. + + """ + @functools.wraps(command) + def configure(args): + try: + config_from_cmdline = { + 'log': { + 'level_console': args.log_level, + 'level_logfile': args.log_level, + }, + 'server': {'loglevel': args.log_level}, + } + except AttributeError: + config_from_cmdline = None + bigchaindb.config_utils.autoconfigure( + filename=args.config, config=config_from_cmdline, force=True) + command(args) + + return configure + + +def start_logging_process(command): + """Decorator to start the logging subscriber process. + + Args: + command: The command to decorate. + + Returns: + The command wrapper function. + + .. important:: + + Configuration, if needed, should be applied before invoking this + decorator, as starting the subscriber process for logging will + configure the root logger for the child process based on the + state of :obj:`bigchaindb.config` at the moment this decorator + is invoked. + + """ + @functools.wraps(command) + def start_logging(args): + from bigchaindb import config + setup_logging(user_log_config=config.get('log')) + command(args) + return start_logging + + +def _convert(value, default=None, convert=None): + def convert_bool(value): + if value.lower() in ('true', 't', 'yes', 'y'): + return True + if value.lower() in ('false', 'f', 'no', 'n'): + return False + raise ValueError('{} cannot be converted to bool'.format(value)) + + if value == '': + value = None + + if convert is None: + if default is not None: + convert = type(default) + else: + convert = str + + if convert == bool: + convert = convert_bool + + if value is None: + return default + else: + return convert(value) + + +# We need this because `input` always prints on stdout, while it should print +# to stderr. It's a very old bug, check it out here: +# - https://bugs.python.org/issue1927 +def input_on_stderr(prompt='', default=None, convert=None): + """Output a string to stderr and wait for input. + + Args: + prompt (str): the message to display. + default: the default value to return if the user + leaves the field empty + convert (callable): a callable to be used to convert + the value the user inserted. If None, the type of + ``default`` will be used. + """ + + print(prompt, end='', file=sys.stderr) + value = builtins.input() + return _convert(value, default, convert) + + def start_rethinkdb(): """Start RethinkDB as a child process and wait for it to be available. @@ -130,6 +238,12 @@ base_parser.add_argument('-c', '--config', help='Specify the location of the configuration file ' '(use "-" for stdout)') +base_parser.add_argument('-l', '--log-level', + type=str.upper, # convert to uppercase for comparison to choices + choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], + default='INFO', + help='Log level') + base_parser.add_argument('-y', '--yes', '--yes-please', action='store_true', help='Assume "yes" as answer to all prompts and run ' diff --git a/bigchaindb/common/crypto.py b/bigchaindb/common/crypto.py index acce02d9..99663fe9 100644 --- a/bigchaindb/common/crypto.py +++ b/bigchaindb/common/crypto.py @@ -1,18 +1,31 @@ # Separate all crypto code so that we can easily test several implementations +from collections import namedtuple import sha3 from cryptoconditions import crypto +CryptoKeypair = namedtuple('CryptoKeypair', ('private_key', 'public_key')) + + def hash_data(data): """Hash the provided data using SHA3-256""" return sha3.sha3_256(data.encode()).hexdigest() def generate_key_pair(): + """Generates a cryptographic key pair. + + Returns: + :class:`~bigchaindb.common.crypto.CryptoKeypair`: A + :obj:`collections.namedtuple` with named fields + :attr:`~bigchaindb.common.crypto.CryptoKeypair.private_key` and + :attr:`~bigchaindb.common.crypto.CryptoKeypair.public_key`. + + """ # TODO FOR CC: Adjust interface so that this function becomes unnecessary - private_key, public_key = crypto.ed25519_generate_key_pair() - return private_key.decode(), public_key.decode() + return CryptoKeypair( + *(k.decode() for k in crypto.ed25519_generate_key_pair())) PrivateKey = crypto.Ed25519SigningKey diff --git a/bigchaindb/common/exceptions.py b/bigchaindb/common/exceptions.py index 60340492..258001b8 100644 --- a/bigchaindb/common/exceptions.py +++ b/bigchaindb/common/exceptions.py @@ -7,40 +7,6 @@ class ConfigurationError(BigchainDBError): """Raised when there is a problem with server configuration""" -class OperationError(BigchainDBError): - """Raised when an operation cannot go through""" - - -class TransactionDoesNotExist(BigchainDBError): - """Raised if the transaction is not in the database""" - - -class TransactionOwnerError(BigchainDBError): - """Raised if a user tries to transfer a transaction they don't own""" - - -class DoubleSpend(BigchainDBError): - """Raised if a double spend is found""" - - -class ValidationError(BigchainDBError): - """Raised if there was an error in validation""" - - -class InvalidHash(ValidationError): - """Raised if there was an error checking the hash for a particular - operation""" - - -class SchemaValidationError(ValidationError): - """Raised if there was any error validating an object's schema""" - - -class InvalidSignature(BigchainDBError): - """Raised if there was an error checking the signature for a particular - operation""" - - class DatabaseAlreadyExists(BigchainDBError): """Raised when trying to create the database but the db is already there""" @@ -49,6 +15,18 @@ class DatabaseDoesNotExist(BigchainDBError): """Raised when trying to delete the database but the db is not there""" +class StartupError(BigchainDBError): + """Raised when there is an error starting up the system""" + + +class GenesisBlockAlreadyExistsError(BigchainDBError): + """Raised when trying to create the already existing genesis block""" + + +class CyclicBlockchainError(BigchainDBError): + """Raised when there is a cycle in the blockchain""" + + class KeypairNotFoundException(BigchainDBError): """Raised if operation cannot proceed because the keypair was not given""" @@ -58,34 +36,73 @@ class KeypairMismatchException(BigchainDBError): current owner(s)""" -class StartupError(BigchainDBError): - """Raised when there is an error starting up the system""" +class OperationError(BigchainDBError): + """Raised when an operation cannot go through""" -class ImproperVoteError(BigchainDBError): +################################################################################ +# Validation errors +# +# All validation errors (which are handleable errors, not faults) should +# subclass ValidationError. However, where possible they should also have their +# own distinct type to differentiate them from other validation errors, +# especially for the purposes of testing. + + +class ValidationError(BigchainDBError): + """Raised if there was an error in validation""" + + +class DoubleSpend(ValidationError): + """Raised if a double spend is found""" + + +class InvalidHash(ValidationError): + """Raised if there was an error checking the hash for a particular + operation""" + + +class SchemaValidationError(ValidationError): + """Raised if there was any error validating an object's schema""" + + +class InvalidSignature(ValidationError): + """Raised if there was an error checking the signature for a particular + operation""" + + +class ImproperVoteError(ValidationError): """Raised if a vote is not constructed correctly, or signed incorrectly""" -class MultipleVotesError(BigchainDBError): +class MultipleVotesError(ValidationError): """Raised if a voter has voted more than once""" -class GenesisBlockAlreadyExistsError(BigchainDBError): - """Raised when trying to create the already existing genesis block""" - - -class CyclicBlockchainError(BigchainDBError): - """Raised when there is a cycle in the blockchain""" - - -class TransactionNotInValidBlock(BigchainDBError): +class TransactionNotInValidBlock(ValidationError): """Raised when a transfer transaction is attempting to fulfill the outputs of a transaction that is in an invalid or undecided block""" -class AssetIdMismatch(BigchainDBError): +class AssetIdMismatch(ValidationError): """Raised when multiple transaction inputs related to different assets""" -class AmountError(BigchainDBError): +class AmountError(ValidationError): """Raised when there is a problem with a transaction's output amounts""" + + +class InputDoesNotExist(ValidationError): + """Raised if a transaction input does not exist""" + + +class TransactionOwnerError(ValidationError): + """Raised if a user tries to transfer a transaction they don't own""" + + +class SybilError(ValidationError): + """If a block or vote comes from an unidentifiable node""" + + +class DuplicateTransaction(ValidationError): + """Raised if a duplicated transaction is found""" diff --git a/bigchaindb/common/schema/__init__.py b/bigchaindb/common/schema/__init__.py index 52c70c13..a69793ad 100644 --- a/bigchaindb/common/schema/__init__.py +++ b/bigchaindb/common/schema/__init__.py @@ -28,7 +28,9 @@ def _load_schema(name): return path, schema -TX_SCHEMA_PATH, TX_SCHEMA = _load_schema('transaction') +TX_SCHEMA_PATH, TX_SCHEMA_COMMON = _load_schema('transaction') +_, TX_SCHEMA_CREATE = _load_schema('transaction_create') +_, TX_SCHEMA_TRANSFER = _load_schema('transaction_transfer') VOTE_SCHEMA_PATH, VOTE_SCHEMA = _load_schema('vote') @@ -41,8 +43,17 @@ def _validate_schema(schema, body): def validate_transaction_schema(tx): - """ Validate a transaction dict """ - _validate_schema(TX_SCHEMA, tx) + """ + Validate a transaction dict. + + TX_SCHEMA_COMMON contains properties that are common to all types of + transaction. TX_SCHEMA_[TRANSFER|CREATE] add additional constraints on top. + """ + _validate_schema(TX_SCHEMA_COMMON, tx) + if tx['operation'] == 'TRANSFER': + _validate_schema(TX_SCHEMA_TRANSFER, tx) + else: + _validate_schema(TX_SCHEMA_CREATE, tx) def validate_vote_schema(vote): diff --git a/bigchaindb/common/schema/transaction_create.yaml b/bigchaindb/common/schema/transaction_create.yaml new file mode 100644 index 00000000..2383a102 --- /dev/null +++ b/bigchaindb/common/schema/transaction_create.yaml @@ -0,0 +1,28 @@ +--- +"$schema": "http://json-schema.org/draft-04/schema#" +type: object +title: Transaction Schema - CREATE/GENESIS specific constraints +required: +- asset +- inputs +properties: + asset: + additionalProperties: false + properties: + data: + anyOf: + - type: object + additionalProperties: true + - type: 'null' + inputs: + type: array + title: "Transaction inputs" + maxItems: 1 + minItems: 1 + items: + type: "object" + required: + - fulfills + properties: + fulfills: + type: "null" diff --git a/bigchaindb/common/schema/transaction_transfer.yaml b/bigchaindb/common/schema/transaction_transfer.yaml new file mode 100644 index 00000000..09a5aa1b --- /dev/null +++ b/bigchaindb/common/schema/transaction_transfer.yaml @@ -0,0 +1,29 @@ +--- +"$schema": "http://json-schema.org/draft-04/schema#" +type: object +title: Transaction Schema - TRANSFER specific properties +required: +- asset +properties: + asset: + additionalProperties: false + properties: + id: + "$ref": "#/definitions/sha3_hexdigest" + description: | + ID of the transaction that created the asset. + inputs: + type: array + title: "Transaction inputs" + minItems: 1 + items: + type: "object" + required: + - fulfills + properties: + fulfills: + type: "object" +definitions: + sha3_hexdigest: + pattern: "[0-9a-f]{64}" + type: string diff --git a/bigchaindb/common/transaction.py b/bigchaindb/common/transaction.py index ebef6987..285272c5 100644 --- a/bigchaindb/common/transaction.py +++ b/bigchaindb/common/transaction.py @@ -757,20 +757,19 @@ class Transaction(object): key_pairs (dict): The keys to sign the Transaction with. """ input_ = deepcopy(input_) - for owner_before in input_.owners_before: - try: - # TODO: CC should throw a KeypairMismatchException, instead of - # our manual mapping here + for owner_before in set(input_.owners_before): + # TODO: CC should throw a KeypairMismatchException, instead of + # our manual mapping here - # TODO FOR CC: Naming wise this is not so smart, - # `get_subcondition` in fact doesn't return a - # condition but a fulfillment + # TODO FOR CC: Naming wise this is not so smart, + # `get_subcondition` in fact doesn't return a + # condition but a fulfillment - # TODO FOR CC: `get_subcondition` is singular. One would not - # expect to get a list back. - ccffill = input_.fulfillment - subffill = ccffill.get_subcondition_from_vk(owner_before)[0] - except IndexError: + # TODO FOR CC: `get_subcondition` is singular. One would not + # expect to get a list back. + ccffill = input_.fulfillment + subffills = ccffill.get_subcondition_from_vk(owner_before) + if not subffills: raise KeypairMismatchException('Public key {} cannot be found ' 'in the fulfillment' .format(owner_before)) @@ -783,7 +782,8 @@ class Transaction(object): # cryptoconditions makes no assumptions of the encoding of the # message to sign or verify. It only accepts bytestrings - subffill.sign(message.encode(), private_key) + for subffill in subffills: + subffill.sign(message.encode(), private_key) return input_ def inputs_valid(self, outputs=None): @@ -983,7 +983,8 @@ class Transaction(object): transactions = [transactions] # create a set of the transactions' asset ids - asset_ids = {tx.id if tx.operation == Transaction.CREATE else tx.asset['id'] + asset_ids = {tx.id if tx.operation == Transaction.CREATE + else tx.asset['id'] for tx in transactions} # check that all the transasctions have the same asset id @@ -993,7 +994,7 @@ class Transaction(object): return asset_ids.pop() @staticmethod - def validate_structure(tx_body): + def validate_id(tx_body): """Validate the transaction ID of a transaction Args: @@ -1025,7 +1026,7 @@ class Transaction(object): Returns: :class:`~bigchaindb.common.transaction.Transaction` """ - cls.validate_structure(tx) + cls.validate_id(tx) inputs = [Input.from_dict(input_) for input_ in tx['inputs']] outputs = [Output.from_dict(output) for output in tx['outputs']] return cls(tx['operation'], tx['asset'], inputs, outputs, diff --git a/bigchaindb/config_utils.py b/bigchaindb/config_utils.py index 87a25d3f..5a72a7d6 100644 --- a/bigchaindb/config_utils.py +++ b/bigchaindb/config_utils.py @@ -220,11 +220,14 @@ def write_config(config, filename=None): json.dump(config, f, indent=4) +def is_configured(): + return bool(bigchaindb.config.get('CONFIGURED')) + + def autoconfigure(filename=None, config=None, force=False): """Run ``file_config`` and ``env_config`` if the module has not been initialized.""" - - if not force and bigchaindb.config.get('CONFIGURED'): + if not force and is_configured(): logger.debug('System already configured, skipping autoconfiguration') return diff --git a/bigchaindb/consensus.py b/bigchaindb/consensus.py index 0e7dc4bd..a0672577 100644 --- a/bigchaindb/consensus.py +++ b/bigchaindb/consensus.py @@ -1,11 +1,4 @@ -import logging - -from bigchaindb.utils import verify_vote_signature -from bigchaindb.common.schema import (SchemaValidationError, - validate_vote_schema) - - -logger = logging.getLogger(__name__) +from bigchaindb.voting import Voting class BaseConsensusRules(): @@ -16,34 +9,15 @@ class BaseConsensusRules(): All methods listed below must be implemented. """ + voting = Voting @staticmethod def validate_transaction(bigchain, transaction): """See :meth:`bigchaindb.models.Transaction.validate` - for documentation. - - """ + for documentation.""" return transaction.validate(bigchain) @staticmethod def validate_block(bigchain, block): """See :meth:`bigchaindb.models.Block.validate` for documentation.""" return block.validate(bigchain) - - @staticmethod - def verify_vote(voters, signed_vote): - """Verify the signature of a vote. - - Refer to the documentation of - :func:`bigchaindb.utils.verify_signature`. - """ - if verify_vote_signature(voters, signed_vote): - try: - validate_vote_schema(signed_vote) - return True - except SchemaValidationError as exc: - logger.warning(exc) - else: - logger.warning('Vote failed signature verification: ' - '%s with voters: %s', signed_vote, voters) - return False diff --git a/bigchaindb/core.py b/bigchaindb/core.py index 9f93d47a..5d2e9c03 100644 --- a/bigchaindb/core.py +++ b/bigchaindb/core.py @@ -1,9 +1,7 @@ import random -import math -import collections from time import time -from itertools import compress +from bigchaindb import exceptions as core_exceptions from bigchaindb.common import crypto, exceptions from bigchaindb.common.utils import gen_timestamp, serialize from bigchaindb.common.transaction import TransactionLink @@ -11,7 +9,6 @@ from bigchaindb.common.transaction import TransactionLink import bigchaindb from bigchaindb import backend, config_utils, utils -from bigchaindb.backend import exceptions as backend_exceptions from bigchaindb.consensus import BaseConsensusRules from bigchaindb.models import Block, Transaction @@ -22,14 +19,17 @@ class Bigchain(object): Create, read, sign, write transactions to the database """ - # return if a block has been voted invalid BLOCK_INVALID = 'invalid' - # return if a block is valid, or tx is in valid block + """return if a block has been voted invalid""" + BLOCK_VALID = TX_VALID = 'valid' - # return if block is undecided, or tx is in undecided block + """return if a block is valid, or tx is in valid block""" + BLOCK_UNDECIDED = TX_UNDECIDED = 'undecided' - # return if transaction is in backlog + """return if block is undecided, or tx is in undecided block""" + TX_IN_BACKLOG = 'backlog' + """return if transaction is in backlog""" def __init__(self, public_key=None, private_key=None, keyring=[], connection=None, backlog_reassign_delay=None): """Initialize the Bigchain instance @@ -72,6 +72,9 @@ class Bigchain(object): if not self.me or not self.me_private: raise exceptions.KeypairNotFoundException() + federation = property(lambda self: set(self.nodes_except_me + [self.me])) + """ Set of federation member public keys """ + def write_transaction(self, signed_transaction): """Write the transaction to bigchain. @@ -110,19 +113,10 @@ class Bigchain(object): dict: database response or None if no reassignment is possible """ - if self.nodes_except_me: - try: - federation_nodes = self.nodes_except_me + [self.me] - index_current_assignee = federation_nodes.index(transaction['assignee']) - new_assignee = random.choice(federation_nodes[:index_current_assignee] + - federation_nodes[index_current_assignee + 1:]) - except ValueError: - # current assignee not in federation - new_assignee = random.choice(self.nodes_except_me) - - else: - # There is no other node to assign to - new_assignee = self.me + other_nodes = tuple( + self.federation.difference([transaction['assignee']]) + ) + new_assignee = random.choice(other_nodes) if other_nodes else self.me return backend.query.update_transaction( self.connection, transaction['id'], @@ -162,31 +156,6 @@ class Bigchain(object): return self.consensus.validate_transaction(self, transaction) - def is_valid_transaction(self, transaction): - """Check whether a transaction is valid or invalid. - - Similar to :meth:`~bigchaindb.Bigchain.validate_transaction` - but never raises an exception. It returns :obj:`False` if - the transaction is invalid. - - Args: - transaction (:Class:`~bigchaindb.models.Transaction`): transaction - to check. - - Returns: - The :class:`~bigchaindb.models.Transaction` instance if valid, - otherwise :obj:`False`. - """ - - try: - return self.validate_transaction(transaction) - except (ValueError, exceptions.OperationError, - exceptions.TransactionDoesNotExist, - exceptions.TransactionOwnerError, exceptions.DoubleSpend, - exceptions.InvalidHash, exceptions.InvalidSignature, - exceptions.TransactionNotInValidBlock, exceptions.AmountError): - return False - def is_new_transaction(self, txid, exclude_block_id=None): """ Return True if the transaction does not exist in any @@ -219,8 +188,7 @@ class Bigchain(object): if include_status: if block: - status = self.block_election_status(block_id, - block['block']['voters']) + status = self.block_election_status(block) return block, status else: return block @@ -321,19 +289,15 @@ class Bigchain(object): blocks = backend.query.get_blocks_status_from_transaction(self.connection, txid) if blocks: # Determine the election status of each block - validity = { - block['id']: self.block_election_status( - block['id'], - block['block']['voters'] - ) for block in blocks - } + validity = {block['id']: self.block_election_status(block) + for block in blocks} # NOTE: If there are multiple valid blocks with this transaction, # something has gone wrong if list(validity.values()).count(Bigchain.BLOCK_VALID) > 1: block_ids = str([block for block in validity if validity[block] == Bigchain.BLOCK_VALID]) - raise backend_exceptions.BigchainDBCritical( + raise core_exceptions.CriticalDoubleInclusion( 'Transaction {tx} is present in ' 'multiple valid blocks: {block_ids}' .format(tx=txid, block_ids=block_ids)) @@ -360,44 +324,57 @@ class Bigchain(object): def get_spent(self, txid, output): """Check if a `txid` was already used as an input. - A transaction can be used as an input for another transaction. Bigchain needs to make sure that a - given `txid` is only used once. + A transaction can be used as an input for another transaction. Bigchain + needs to make sure that a given `(txid, output)` is only used once. + + This method will check if the `(txid, output)` has already been + spent in a transaction that is in either the `VALID`, `UNDECIDED` or + `BACKLOG` state. Args: txid (str): The id of the transaction output (num): the index of the output in the respective transaction Returns: - The transaction (Transaction) that used the `txid` as an input else - `None` + The transaction (Transaction) that used the `(txid, output)` as an + input else `None` + + Raises: + CriticalDoubleSpend: If the given `(txid, output)` was spent in + more than one valid transaction. """ # checks if an input was already spent # checks if the bigchain has any transaction with input {'txid': ..., # 'output': ...} - transactions = list(backend.query.get_spent(self.connection, txid, output)) + transactions = list(backend.query.get_spent(self.connection, txid, + output)) # a transaction_id should have been spent at most one time - if transactions: - # determine if these valid transactions appear in more than one valid block - num_valid_transactions = 0 - for transaction in transactions: - # ignore invalid blocks - # FIXME: Isn't there a faster solution than doing I/O again? - if self.get_transaction(transaction['id']): - num_valid_transactions += 1 - if num_valid_transactions > 1: - raise exceptions.DoubleSpend(('`{}` was spent more than' - ' once. There is a problem' - ' with the chain') - .format(txid)) + # determine if these valid transactions appear in more than one valid + # block + num_valid_transactions = 0 + non_invalid_transactions = [] + for transaction in transactions: + # ignore transactions in invalid blocks + # FIXME: Isn't there a faster solution than doing I/O again? + _, status = self.get_transaction(transaction['id'], + include_status=True) + if status == self.TX_VALID: + num_valid_transactions += 1 + # `txid` can only have been spent in at most on valid block. + if num_valid_transactions > 1: + raise core_exceptions.CriticalDoubleSpend( + '`{}` was spent more than once. There is a problem' + ' with the chain'.format(txid)) + # if its not and invalid transaction + if status is not None: + non_invalid_transactions.append(transaction) - if num_valid_transactions: - return Transaction.from_dict(transactions[0]) - else: - # all queried transactions were invalid - return None - else: - return None + if non_invalid_transactions: + return Transaction.from_dict(non_invalid_transactions[0]) + + # Either no transaction was returned spending the `(txid, output)` as + # input or the returned transactions are not valid. def get_outputs(self, owner): """Retrieve a list of links to transaction outputs for a given public @@ -412,33 +389,37 @@ class Bigchain(object): """ # get all transactions in which owner is in the `owners_after` list response = backend.query.get_owned_ids(self.connection, owner) - links = [] + return [ + TransactionLink(tx['id'], index) + for tx in response + if not self.is_tx_strictly_in_invalid_block(tx['id']) + for index, output in enumerate(tx['outputs']) + if utils.output_has_owner(output, owner) + ] - for tx in response: - # disregard transactions from invalid blocks - validity = self.get_blocks_status_containing_tx(tx['id']) - if Bigchain.BLOCK_VALID not in validity.values(): - if Bigchain.BLOCK_UNDECIDED not in validity.values(): - continue + def is_tx_strictly_in_invalid_block(self, txid): + """ + Checks whether the transaction with the given ``txid`` + *strictly* belongs to an invalid block. - # NOTE: It's OK to not serialize the transaction here, as we do not - # use it after the execution of this function. - # a transaction can contain multiple outputs so we need to iterate over all of them - # to get a list of outputs available to spend - for index, output in enumerate(tx['outputs']): - # for simple signature conditions there are no subfulfillments - # check if the owner is in the condition `owners_after` - if len(output['public_keys']) == 1: - if output['condition']['details']['public_key'] == owner: - tx_link = TransactionLink(tx['id'], index) - else: - # for transactions with multiple `public_keys` there will be several subfulfillments nested - # in the condition. We need to iterate the subfulfillments to make sure there is a - # subfulfillment for `owner` - if utils.condition_details_has_owner(output['condition']['details'], owner): - tx_link = TransactionLink(tx['id'], index) - links.append(tx_link) - return links + Args: + txid (str): Transaction id. + + Returns: + bool: ``True`` if the transaction *strictly* belongs to a + block that is invalid. ``False`` otherwise. + + Note: + Since a transaction may be in multiple blocks, with + different statuses, the term "strictly" is used to + emphasize that if a transaction is said to be in an invalid + block, it means that it is not in any other block that is + either valid or undecided. + + """ + validity = self.get_blocks_status_containing_tx(txid) + return (Bigchain.BLOCK_VALID not in validity.values() and + Bigchain.BLOCK_UNDECIDED not in validity.values()) def get_owned_ids(self, owner): """Retrieve a list of ``txid`` s that can be used as inputs. @@ -491,7 +472,7 @@ class Bigchain(object): raise exceptions.OperationError('Empty block creation is not ' 'allowed') - voters = self.nodes_except_me + [self.me] + voters = list(self.federation) block = Block(validated_transactions, self.me, gen_timestamp(), voters) block = block.sign(self.me_private) @@ -510,36 +491,20 @@ class Bigchain(object): """ return self.consensus.validate_block(self, block) - def has_previous_vote(self, block_id, voters): + def has_previous_vote(self, block_id): """Check for previous votes from this node Args: block_id (str): the id of the block to check - voters (list(str)): the voters of the block to check Returns: bool: :const:`True` if this block already has a valid vote from this node, :const:`False` otherwise. - Raises: - ImproperVoteError: If there is already a vote, - but the vote is invalid. - """ votes = list(backend.query.get_votes_by_block_id_and_voter(self.connection, block_id, self.me)) - - if len(votes) > 1: - raise exceptions.MultipleVotesError('Block {block_id} has {n_votes} votes from public key {me}' - .format(block_id=block_id, n_votes=str(len(votes)), me=self.me)) - has_previous_vote = False - if votes: - if utils.verify_vote_signature(voters, votes[0]): - has_previous_vote = True - else: - raise exceptions.ImproperVoteError('Block {block_id} already has an incorrectly signed vote ' - 'from public key {me}'.format(block_id=block_id, me=self.me)) - - return has_previous_vote + el, _ = self.consensus.voting.partition_eligible_votes(votes, [self.me]) + return bool(el) def write_block(self, block): """Write a block to bigchain. @@ -639,69 +604,15 @@ class Bigchain(object): # XXX: should this return instaces of Block? return backend.query.get_unvoted_blocks(self.connection, self.me) - def block_election_status(self, block_id, voters): - """Tally the votes on a block, and return the status: valid, invalid, or undecided.""" + def block_election(self, block): + if type(block) != dict: + block = block.to_dict() + votes = list(backend.query.get_votes_by_block_id(self.connection, + block['id'])) + return self.consensus.voting.block_election(block, votes, + self.federation) - votes = list(backend.query.get_votes_by_block_id(self.connection, block_id)) - n_voters = len(voters) - - voter_counts = collections.Counter([vote['node_pubkey'] for vote in votes]) - for node in voter_counts: - if voter_counts[node] > 1: - raise exceptions.MultipleVotesError( - 'Block {block_id} has multiple votes ({n_votes}) from voting node {node_id}' - .format(block_id=block_id, n_votes=str(voter_counts[node]), node_id=node)) - - if len(votes) > n_voters: - raise exceptions.MultipleVotesError('Block {block_id} has {n_votes} votes cast, but only {n_voters} voters' - .format(block_id=block_id, n_votes=str(len(votes)), - n_voters=str(n_voters))) - - # vote_cast is the list of votes e.g. [True, True, False] - vote_cast = [vote['vote']['is_block_valid'] for vote in votes] - # prev_block are the ids of the nominal prev blocks e.g. - # ['block1_id', 'block1_id', 'block2_id'] - prev_block = [vote['vote']['previous_block'] for vote in votes] - # vote_validity checks whether a vote is valid - # or invalid, e.g. [False, True, True] - vote_validity = [self.consensus.verify_vote(voters, vote) for vote in votes] - - # element-wise product of stated vote and validity of vote - # vote_cast = [True, True, False] and - # vote_validity = [False, True, True] gives - # [True, False] - # Only the correctly signed votes are tallied. - vote_list = list(compress(vote_cast, vote_validity)) - - # Total the votes. Here, valid and invalid refer - # to the vote cast, not whether the vote itself - # is valid or invalid. - n_valid_votes = sum(vote_list) - n_invalid_votes = len(vote_cast) - n_valid_votes - - # The use of ceiling and floor is to account for the case of an - # even number of voters where half the voters have voted 'invalid' - # and half 'valid'. In this case, the block should be marked invalid - # to avoid a tie. In the case of an odd number of voters this is not - # relevant, since one side must be a majority. - if n_invalid_votes >= math.ceil(n_voters / 2): - return Bigchain.BLOCK_INVALID - elif n_valid_votes > math.floor(n_voters / 2): - # The block could be valid, but we still need to check if votes - # agree on the previous block. - # - # First, only consider blocks with legitimate votes - prev_block_list = list(compress(prev_block, vote_validity)) - # Next, only consider the blocks with 'yes' votes - prev_block_valid_list = list(compress(prev_block_list, vote_list)) - counts = collections.Counter(prev_block_valid_list) - # Make sure the majority vote agrees on previous node. - # The majority vote must be the most common, by definition. - # If it's not, there is no majority agreement on the previous - # block. - if counts.most_common()[0][1] > math.floor(n_voters / 2): - return Bigchain.BLOCK_VALID - else: - return Bigchain.BLOCK_INVALID - else: - return Bigchain.BLOCK_UNDECIDED + def block_election_status(self, block): + """Tally the votes on a block, and return the status: + valid, invalid, or undecided.""" + return self.block_election(block)['status'] diff --git a/bigchaindb/events.py b/bigchaindb/events.py new file mode 100644 index 00000000..bc448ce3 --- /dev/null +++ b/bigchaindb/events.py @@ -0,0 +1,33 @@ +from enum import Enum +from multiprocessing import Queue + + +class EventTypes(Enum): + BLOCK_VALID = 1 + BLOCK_INVALID = 2 + + +class Event: + + def __init__(self, event_type, event_data): + self.type = event_type + self.data = event_data + + +class EventHandler: + + def __init__(self, events_queue): + self.events_queue = events_queue + + def put_event(self, event, timeout=None): + # TODO: handle timeouts + self.events_queue.put(event, timeout=None) + + def get_event(self, timeout=None): + # TODO: handle timeouts + return self.events_queue.get(timeout=None) + + +def setup_events_queue(): + # TODO: set bounds to the queue + return Queue() diff --git a/bigchaindb/exceptions.py b/bigchaindb/exceptions.py index d8a4cd73..a11fd4f8 100644 --- a/bigchaindb/exceptions.py +++ b/bigchaindb/exceptions.py @@ -1,2 +1,14 @@ class BigchainDBError(Exception): """Base class for BigchainDB exceptions.""" + + +class CriticalDoubleSpend(BigchainDBError): + """Data integrity error that requires attention""" + + +class CriticalDoubleInclusion(BigchainDBError): + """Data integrity error that requires attention""" + + +class CriticalDuplicateVote(BigchainDBError): + """Data integrity error that requires attention""" diff --git a/bigchaindb/log/__init__.py b/bigchaindb/log/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bigchaindb/log/configs.py b/bigchaindb/log/configs.py new file mode 100644 index 00000000..034256a4 --- /dev/null +++ b/bigchaindb/log/configs.py @@ -0,0 +1,67 @@ +import logging +from logging.handlers import DEFAULT_TCP_LOGGING_PORT +from os.path import expanduser, join + + +DEFAULT_SOCKET_LOGGING_HOST = 'localhost' +DEFAULT_SOCKET_LOGGING_PORT = DEFAULT_TCP_LOGGING_PORT +DEFAULT_SOCKET_LOGGING_ADDR = (DEFAULT_SOCKET_LOGGING_HOST, + DEFAULT_SOCKET_LOGGING_PORT) +DEFAULT_LOG_DIR = expanduser('~') + +PUBLISHER_LOGGING_CONFIG = { + 'version': 1, + 'disable_existing_loggers': False, + 'root': { + 'level': logging.DEBUG, + }, +} + +SUBSCRIBER_LOGGING_CONFIG = { + 'version': 1, + 'disable_existing_loggers': False, + 'formatters': { + 'console': { + 'class': 'logging.Formatter', + 'format': ('[%(asctime)s] [%(levelname)s] (%(name)s) ' + '%(message)s (%(processName)-10s - pid: %(process)d)'), + 'datefmt': '%Y-%m-%d %H:%M:%S', + }, + 'file': { + 'class': 'logging.Formatter', + 'format': ('[%(asctime)s] [%(levelname)s] (%(name)s) ' + '%(message)s (%(processName)-10s - pid: %(process)d)'), + 'datefmt': '%Y-%m-%d %H:%M:%S', + }, + }, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + 'formatter': 'console', + 'level': logging.INFO, + }, + 'file': { + 'class': 'logging.handlers.RotatingFileHandler', + 'filename': join(DEFAULT_LOG_DIR, 'bigchaindb.log'), + 'mode': 'w', + 'maxBytes': 209715200, + 'backupCount': 5, + 'formatter': 'file', + 'level': logging.INFO, + }, + 'errors': { + 'class': 'logging.handlers.RotatingFileHandler', + 'filename': join(DEFAULT_LOG_DIR, 'bigchaindb-errors.log'), + 'mode': 'w', + 'maxBytes': 209715200, + 'backupCount': 5, + 'formatter': 'file', + 'level': logging.ERROR, + }, + }, + 'loggers': {}, + 'root': { + 'level': logging.DEBUG, + 'handlers': ['console', 'file', 'errors'] + }, +} diff --git a/bigchaindb/log/loggers.py b/bigchaindb/log/loggers.py new file mode 100644 index 00000000..f8c18320 --- /dev/null +++ b/bigchaindb/log/loggers.py @@ -0,0 +1,32 @@ +import logging.handlers + +from gunicorn.glogging import Logger + +from .configs import DEFAULT_SOCKET_LOGGING_HOST, DEFAULT_SOCKET_LOGGING_PORT + + +class HttpServerLogger(Logger): + """Custom logger class for ``gunicorn`` logs. + + Meant for internal usage only, to set the ``logger_class`` + configuration setting on gunicorn. + + """ + def setup(self, cfg): + """Setup the gunicorn access and error loggers. This overrides + the parent method. Its main goal is to simply pipe all the logs to + the TCP socket used througout BigchainDB. + + Args: + cfg (:obj:`gunicorn.config.Config`): Gunicorn configuration + object. *Ignored*. + + """ + self._set_socklog_handler(self.error_log) + self._set_socklog_handler(self.access_log) + + def _set_socklog_handler(self, log): + socket_handler = logging.handlers.SocketHandler( + DEFAULT_SOCKET_LOGGING_HOST, DEFAULT_SOCKET_LOGGING_PORT) + socket_handler._gunicorn = True + log.addHandler(socket_handler) diff --git a/bigchaindb/log/setup.py b/bigchaindb/log/setup.py new file mode 100644 index 00000000..b6b45b00 --- /dev/null +++ b/bigchaindb/log/setup.py @@ -0,0 +1,178 @@ +"""Setup logging.""" +from copy import deepcopy +import logging +from logging.config import dictConfig +import logging.handlers +import pickle +from socketserver import StreamRequestHandler, ThreadingTCPServer +import struct +import sys +from multiprocessing import Process + +from .configs import ( + DEFAULT_SOCKET_LOGGING_HOST, + DEFAULT_SOCKET_LOGGING_PORT, + PUBLISHER_LOGGING_CONFIG, + SUBSCRIBER_LOGGING_CONFIG, +) +from bigchaindb.common.exceptions import ConfigurationError + + +def _normalize_log_level(level): + try: + return level.upper() + except AttributeError as exc: + raise ConfigurationError('Log level must be a string!') from exc + + +def setup_pub_logger(): + dictConfig(PUBLISHER_LOGGING_CONFIG) + socket_handler = logging.handlers.SocketHandler( + DEFAULT_SOCKET_LOGGING_HOST, DEFAULT_SOCKET_LOGGING_PORT) + socket_handler.setLevel(logging.DEBUG) + logger = logging.getLogger() + logger.addHandler(socket_handler) + + +def setup_sub_logger(*, user_log_config=None): + server = LogRecordSocketServer() + with server: + server_proc = Process( + target=server.serve_forever, + kwargs={'log_config': user_log_config}, + ) + server_proc.start() + + +def setup_logging(*, user_log_config=None): + setup_pub_logger() + setup_sub_logger(user_log_config=user_log_config) + + +def create_subscriber_logging_config(*, user_log_config=None): # noqa: C901 + sub_log_config = deepcopy(SUBSCRIBER_LOGGING_CONFIG) + + if not user_log_config: + return sub_log_config + + if 'file' in user_log_config: + filename = user_log_config['file'] + sub_log_config['handlers']['file']['filename'] = filename + + if 'error_file' in user_log_config: + error_filename = user_log_config['error_file'] + sub_log_config['handlers']['errors']['filename'] = error_filename + + if 'level_console' in user_log_config: + level = _normalize_log_level(user_log_config['level_console']) + sub_log_config['handlers']['console']['level'] = level + + if 'level_logfile' in user_log_config: + level = _normalize_log_level(user_log_config['level_logfile']) + sub_log_config['handlers']['file']['level'] = level + + if 'fmt_console' in user_log_config: + fmt = user_log_config['fmt_console'] + sub_log_config['formatters']['console']['format'] = fmt + + if 'fmt_logfile' in user_log_config: + fmt = user_log_config['fmt_logfile'] + sub_log_config['formatters']['file']['format'] = fmt + + if 'datefmt_console' in user_log_config: + fmt = user_log_config['datefmt_console'] + sub_log_config['formatters']['console']['datefmt'] = fmt + + if 'datefmt_logfile' in user_log_config: + fmt = user_log_config['datefmt_logfile'] + sub_log_config['formatters']['file']['datefmt'] = fmt + + log_levels = user_log_config.get('granular_levels', {}) + + for logger_name, level in log_levels.items(): + level = _normalize_log_level(level) + try: + sub_log_config['loggers'][logger_name]['level'] = level + except KeyError: + sub_log_config['loggers'][logger_name] = {'level': level} + + return sub_log_config + + +class LogRecordStreamHandler(StreamRequestHandler): + """Handler for a streaming logging request. + + This basically logs the record using whatever logging policy is + configured locally. + """ + + def handle(self): + """ + Handle multiple requests - each expected to be a 4-byte length, + followed by the LogRecord in pickle format. Logs the record + according to whatever policy is configured locally. + """ + while True: + chunk = self.connection.recv(4) + if len(chunk) < 4: + break + slen = struct.unpack('>L', chunk)[0] + chunk = self.connection.recv(slen) + while len(chunk) < slen: + chunk = chunk + self.connection.recv(slen - len(chunk)) + obj = self.unpickle(chunk) + record = logging.makeLogRecord(obj) + self.handle_log_record(record) + + def unpickle(self, data): + try: + return pickle.loads(data) + except (pickle.UnpicklingError, + AttributeError, EOFError, TypeError) as exc: + return { + 'msg': '({}) Log handling error: un-pickling failed!'.format( + exc.__class__.__name__), + 'exc_info': exc.args, + 'level': logging.ERROR, + 'func': self.unpickle.__name__, + } + + def handle_log_record(self, record): + logger = logging.getLogger(record.name) + logger.handle(record) + + +class LogRecordSocketServer(ThreadingTCPServer): + """ + Simple TCP socket-based logging server. + + """ + allow_reuse_address = True + + def __init__(self, + host='localhost', + port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, + handler=LogRecordStreamHandler): + super().__init__((host, port), handler) + + def serve_forever(self, *, poll_interval=0.5, log_config=None): + sub_logging_config = create_subscriber_logging_config( + user_log_config=log_config) + dictConfig(sub_logging_config) + try: + super().serve_forever(poll_interval=poll_interval) + except KeyboardInterrupt: + pass + + +# NOTE: Because the context manager is only available +# from 3.6 and up, we add it for lower versions. +if sys.version_info < (3, 6): + def __enter__(self): + return self + + def __exit__(self, *args): + self.server_close() + + LogRecordSocketServer.__enter__ = __enter__ + LogRecordSocketServer.__exit__ = __exit__ diff --git a/bigchaindb/models.py b/bigchaindb/models.py index ee7efe8f..2f46ba20 100644 --- a/bigchaindb/models.py +++ b/bigchaindb/models.py @@ -1,9 +1,10 @@ from bigchaindb.common.crypto import hash_data, PublicKey, PrivateKey from bigchaindb.common.exceptions import (InvalidHash, InvalidSignature, - OperationError, DoubleSpend, - TransactionDoesNotExist, + DoubleSpend, InputDoesNotExist, TransactionNotInValidBlock, - AssetIdMismatch, AmountError) + AssetIdMismatch, AmountError, + SybilError, + DuplicateTransaction) from bigchaindb.common.transaction import Transaction from bigchaindb.common.utils import gen_timestamp, serialize from bigchaindb.common.schema import validate_transaction_schema @@ -11,7 +12,7 @@ from bigchaindb.common.schema import validate_transaction_schema class Transaction(Transaction): def validate(self, bigchain): - """Validate a transaction. + """Validate transaction spend Args: bigchain (Bigchain): an instantiated bigchaindb.Bigchain object. @@ -22,45 +23,11 @@ class Transaction(Transaction): invalid. Raises: - OperationError: if the transaction operation is not supported - TransactionDoesNotExist: if the input of the transaction is not - found - TransactionNotInValidBlock: if the input of the transaction is not - in a valid block - TransactionOwnerError: if the new transaction is using an input it - doesn't own - DoubleSpend: if the transaction is a double spend - InvalidHash: if the hash of the transaction is wrong - InvalidSignature: if the signature of the transaction is wrong + ValidationError: If the transaction is invalid """ - if len(self.inputs) == 0: - raise ValueError('Transaction contains no inputs') - input_conditions = [] - inputs_defined = all([input_.fulfills for input_ in self.inputs]) - - # validate amounts - if any(output.amount < 1 for output in self.outputs): - raise AmountError('`amount` needs to be greater than zero') - - if self.operation in (Transaction.CREATE, Transaction.GENESIS): - # validate asset - if self.asset['data'] is not None and not isinstance(self.asset['data'], dict): - raise TypeError(('`asset.data` must be a dict instance or ' - 'None for `CREATE` transactions')) - # validate inputs - if inputs_defined: - raise ValueError('A CREATE operation has no inputs') - elif self.operation == Transaction.TRANSFER: - # validate asset - if not isinstance(self.asset['id'], str): - raise ValueError(('`asset.id` must be a string for ' - '`TRANSFER` transations')) - # check inputs - if not inputs_defined: - raise ValueError('Only `CREATE` transactions can have null ' - 'inputs') + if self.operation == Transaction.TRANSFER: # store the inputs so that we can check if the asset ids match input_txs = [] for input_ in self.inputs: @@ -69,8 +36,8 @@ class Transaction(Transaction): get_transaction(input_txid, include_status=True) if input_tx is None: - raise TransactionDoesNotExist("input `{}` doesn't exist" - .format(input_txid)) + raise InputDoesNotExist("input `{}` doesn't exist" + .format(input_txid)) if status != bigchain.TX_VALID: raise TransactionNotInValidBlock( @@ -85,8 +52,6 @@ class Transaction(Transaction): output = input_tx.outputs[input_.fulfills.output] input_conditions.append(output) input_txs.append(input_tx) - if output.amount < 1: - raise AmountError('`amount` needs to be greater than zero') # Validate that all inputs are distinct links = [i.fulfills.to_uri() for i in self.inputs] @@ -100,11 +65,6 @@ class Transaction(Transaction): ' match the asset id of the' ' transaction')) - # validate the amounts - for output in self.outputs: - if output.amount < 1: - raise AmountError('`amount` needs to be greater than zero') - input_amount = sum([input_condition.amount for input_condition in input_conditions]) output_amount = sum([output_condition.amount for output_condition in self.outputs]) @@ -114,11 +74,6 @@ class Transaction(Transaction): ' in the outputs `{}`') .format(input_amount, output_amount)) - else: - allowed_operations = ', '.join(Transaction.ALLOWED_OPERATIONS) - raise TypeError('`operation`: `{}` must be either {}.' - .format(self.operation, allowed_operations)) - if not self.inputs_valid(input_conditions): raise InvalidSignature('Transaction signature is invalid.') @@ -205,18 +160,8 @@ class Block(object): raised. Raises: - OperationError: If a non-federation node signed the Block. - InvalidSignature: If a Block's signature is invalid or if the - block contains a transaction with an invalid signature. - OperationError: if the transaction operation is not supported - TransactionDoesNotExist: if the input of the transaction is not - found - TransactionNotInValidBlock: if the input of the transaction is not - in a valid block - TransactionOwnerError: if the new transaction is using an input it - doesn't own - DoubleSpend: if the transaction is a double spend - InvalidHash: if the hash of the transaction is wrong + ValidationError: If the block or any transaction in the block does + not validate """ self._validate_block(bigchain) @@ -232,18 +177,21 @@ class Block(object): object. Raises: - OperationError: If a non-federation node signed the Block. - InvalidSignature: If a Block's signature is invalid. + ValidationError: If there is a problem with the block """ # Check if the block was created by a federation node - possible_voters = (bigchain.nodes_except_me + [bigchain.me]) - if self.node_pubkey not in possible_voters: - raise OperationError('Only federation nodes can create blocks') + if self.node_pubkey not in bigchain.federation: + raise SybilError('Only federation nodes can create blocks') # Check that the signature is valid if not self.is_signature_valid(): raise InvalidSignature('Invalid block signature') + # Check that the block contains no duplicated transactions + txids = [tx.id for tx in self.transactions] + if len(txids) != len(set(txids)): + raise DuplicateTransaction('Block has duplicate transaction') + def _validate_block_transactions(self, bigchain): """Validate Block transactions. @@ -251,16 +199,7 @@ class Block(object): bigchain (Bigchain): an instantiated bigchaindb.Bigchain object. Raises: - OperationError: if the transaction operation is not supported - TransactionDoesNotExist: if the input of the transaction is not - found - TransactionNotInValidBlock: if the input of the transaction is not - in a valid block - TransactionOwnerError: if the new transaction is using an input it - doesn't own - DoubleSpend: if the transaction is a double spend - InvalidHash: if the hash of the transaction is wrong - InvalidSignature: if the signature of the transaction is wrong + ValidationError: If an invalid transaction is found """ for tx in self.transactions: # If a transaction is not valid, `validate_transactions` will @@ -341,10 +280,10 @@ class Block(object): dict: The Block as a dict. Raises: - OperationError: If the Block doesn't contain any transactions. + ValueError: If the Block doesn't contain any transactions. """ if len(self.transactions) == 0: - raise OperationError('Empty block creation is not allowed') + raise ValueError('Empty block creation is not allowed') block = { 'timestamp': self.timestamp, diff --git a/bigchaindb/pipelines/block.py b/bigchaindb/pipelines/block.py index 1f2e9017..0fe327bb 100644 --- a/bigchaindb/pipelines/block.py +++ b/bigchaindb/pipelines/block.py @@ -13,8 +13,7 @@ import bigchaindb from bigchaindb import backend from bigchaindb.backend.changefeed import ChangeFeed from bigchaindb.models import Transaction -from bigchaindb.common.exceptions import (SchemaValidationError, InvalidHash, - InvalidSignature, AmountError) +from bigchaindb.common.exceptions import ValidationError from bigchaindb import Bigchain @@ -31,7 +30,7 @@ class BlockPipeline: def __init__(self): """Initialize the BlockPipeline creator""" self.bigchain = Bigchain() - self.txs = [] + self.txs = tx_collector() def filter_tx(self, tx): """Filter a transaction. @@ -63,8 +62,7 @@ class BlockPipeline: """ try: tx = Transaction.from_dict(tx) - except (SchemaValidationError, InvalidHash, InvalidSignature, - AmountError): + except ValidationError: return None # If transaction is in any VALID or UNDECIDED block we @@ -74,12 +72,14 @@ class BlockPipeline: return None # If transaction is not valid it should not be included - if not self.bigchain.is_valid_transaction(tx): + try: + tx.validate(self.bigchain) + return tx + except ValidationError as e: + logger.warning('Invalid tx: %s', e) self.bigchain.delete_transaction(tx.id) return None - return tx - def create(self, tx, timeout=False): """Create a block. @@ -98,11 +98,10 @@ class BlockPipeline: :class:`~bigchaindb.models.Block`: The block, if a block is ready, or ``None``. """ - if tx: - self.txs.append(tx) - if len(self.txs) == 1000 or (timeout and self.txs): - block = self.bigchain.create_block(self.txs) - self.txs = [] + txs = self.txs.send(tx) + if len(txs) == 1000 or (timeout and txs): + block = self.bigchain.create_block(txs) + self.txs = tx_collector() return block def write(self, block): @@ -134,6 +133,27 @@ class BlockPipeline: return block +def tx_collector(): + """ A helper to deduplicate transactions """ + + def snowflake(): + txids = set() + txs = [] + while True: + tx = yield txs + if tx: + if tx.id not in txids: + txids.add(tx.id) + txs.append(tx) + else: + logger.info('Refusing to add tx to block twice: ' + + tx.id) + + s = snowflake() + s.send(None) + return s + + def create_pipeline(): """Create and return the pipeline of operations to be distributed on different processes.""" diff --git a/bigchaindb/pipelines/election.py b/bigchaindb/pipelines/election.py index 850613a3..fc7cb077 100644 --- a/bigchaindb/pipelines/election.py +++ b/bigchaindb/pipelines/election.py @@ -13,16 +13,21 @@ from bigchaindb import backend from bigchaindb.backend.changefeed import ChangeFeed from bigchaindb.models import Block from bigchaindb import Bigchain +from bigchaindb.events import EventHandler, Event, EventTypes logger = logging.getLogger(__name__) +logger_results = logging.getLogger('pipeline.election.results') class Election: """Election class.""" - def __init__(self): + def __init__(self, events_queue=None): self.bigchain = Bigchain() + self.event_handler = None + if events_queue: + self.event_handler = EventHandler(events_queue) def check_for_quorum(self, next_vote): """ @@ -32,14 +37,30 @@ class Election: next_vote: The next vote. """ - next_block = self.bigchain.get_block( - next_vote['vote']['voting_for_block']) + try: + block_id = next_vote['vote']['voting_for_block'] + node = next_vote['node_pubkey'] + except KeyError: + return - block_status = self.bigchain.block_election_status(next_block['id'], - next_block['block']['voters']) - if block_status == self.bigchain.BLOCK_INVALID: + next_block = self.bigchain.get_block(block_id) + + result = self.bigchain.block_election(next_block) + self.handle_block_events(result, block_id) + if result['status'] == self.bigchain.BLOCK_INVALID: return Block.from_dict(next_block) + # Log the result + if result['status'] != self.bigchain.BLOCK_UNDECIDED: + msg = 'node:%s block:%s status:%s' % \ + (node, block_id, result['status']) + # Extra data can be accessed via the log formatter. + # See logging.dictConfig. + logger_results.debug(msg, extra={ + 'current_vote': next_vote, + 'election_result': result, + }) + def requeue_transactions(self, invalid_block): """ Liquidates transactions from invalid blocks so they can be processed again @@ -51,9 +72,21 @@ class Election: self.bigchain.write_transaction(tx) return invalid_block + def handle_block_events(self, result, block_id): + if self.event_handler: + if result['status'] == self.bigchain.BLOCK_UNDECIDED: + return + elif result['status'] == self.bigchain.BLOCK_INVALID: + event_type = EventTypes.BLOCK_INVALID + elif result['status'] == self.bigchain.BLOCK_VALID: + event_type = EventTypes.BLOCK_VALID -def create_pipeline(): - election = Election() + event = Event(event_type, self.bigchain.get_block(block_id)) + self.event_handler.put_event(event) + + +def create_pipeline(events_queue=None): + election = Election(events_queue=events_queue) election_pipeline = Pipeline([ Node(election.check_for_quorum), @@ -68,8 +101,8 @@ def get_changefeed(): return backend.get_changefeed(connection, 'votes', ChangeFeed.INSERT) -def start(): - pipeline = create_pipeline() +def start(events_queue=None): + pipeline = create_pipeline(events_queue=events_queue) pipeline.setup(indata=get_changefeed()) pipeline.start() return pipeline diff --git a/bigchaindb/pipelines/vote.py b/bigchaindb/pipelines/vote.py index a857ba78..9664c520 100644 --- a/bigchaindb/pipelines/vote.py +++ b/bigchaindb/pipelines/vote.py @@ -13,9 +13,7 @@ from multipipes import Pipeline, Node import bigchaindb from bigchaindb import Bigchain from bigchaindb import backend -from bigchaindb import config_utils from bigchaindb.backend.changefeed import ChangeFeed -from bigchaindb.consensus import BaseConsensusRules from bigchaindb.models import Transaction, Block from bigchaindb.common import exceptions @@ -37,13 +35,6 @@ class Vote: # we need to create a temporary instance of BigchainDB that we use # only to query RethinkDB - consensusPlugin = bigchaindb.config.get('consensus_plugin') - - if consensusPlugin: - self.consensus = config_utils.load_consensus_plugin(consensusPlugin) - else: - self.consensus = BaseConsensusRules - # This is the Bigchain instance that will be "shared" (aka: copied) # by all the subprocesses @@ -57,8 +48,7 @@ class Vote: [([self.bigchain.me], 1)]) def validate_block(self, block): - if not self.bigchain.has_previous_vote(block['id'], - block['block']['voters']): + if not self.bigchain.has_previous_vote(block['id']): try: block = Block.from_dict(block) except (exceptions.InvalidHash): @@ -70,7 +60,7 @@ class Vote: return block['id'], [self.invalid_dummy_tx] try: block._validate_block(self.bigchain) - except (exceptions.OperationError, exceptions.InvalidSignature): + except exceptions.ValidationError: # XXX: if a block is invalid we should skip the `validate_tx` # step, but since we are in a pipeline we cannot just jump to # another function. Hackish solution: generate an invalid @@ -114,7 +104,13 @@ class Vote: if not new: return False, block_id, num_tx - valid = bool(self.bigchain.is_valid_transaction(tx)) + try: + tx.validate(self.bigchain) + valid = True + except exceptions.ValidationError as e: + logger.warning('Invalid tx: %s', e) + valid = False + return valid, block_id, num_tx def vote(self, tx_validity, block_id, num_tx): diff --git a/bigchaindb/processes.py b/bigchaindb/processes.py index 01d7a55a..205cdd3c 100644 --- a/bigchaindb/processes.py +++ b/bigchaindb/processes.py @@ -3,7 +3,8 @@ import multiprocessing as mp import bigchaindb from bigchaindb.pipelines import vote, block, election, stale -from bigchaindb.web import server +from bigchaindb.events import setup_events_queue +from bigchaindb.web import server, websocket_server logger = logging.getLogger(__name__) @@ -25,6 +26,13 @@ BANNER = """ def start(): logger.info('Initializing BigchainDB...') + # Create the events queue + # The events queue needs to be initialized once and shared between + # processes. This seems the best way to do it + # At this point only the election processs and the event consumer require + # this queue. + events_queue = setup_events_queue() + # start the processes logger.info('Starting block') block.start() @@ -36,12 +44,18 @@ def start(): stale.start() logger.info('Starting election') - election.start() + election.start(events_queue=events_queue) # start the web api app_server = server.create_server(bigchaindb.config['server']) p_webapi = mp.Process(name='webapi', target=app_server.run) p_webapi.start() + logger.info('WebSocket server started') + p_websocket_server = mp.Process(name='ws', + target=websocket_server.start, + args=(events_queue,)) + p_websocket_server.start() + # start message logger.info(BANNER.format(bigchaindb.config['server']['bind'])) diff --git a/bigchaindb/utils.py b/bigchaindb/utils.py index 1860dd3e..f87916b7 100644 --- a/bigchaindb/utils.py +++ b/bigchaindb/utils.py @@ -3,9 +3,6 @@ import threading import queue import multiprocessing as mp -from bigchaindb.common import crypto -from bigchaindb.common.utils import serialize - class ProcessGroup(object): @@ -116,28 +113,17 @@ def condition_details_has_owner(condition_details, owner): return False -def verify_vote_signature(voters, signed_vote): - """Verify the signature of a vote - - A valid vote should have been signed by a voter's private key. - - Args: - voters (list): voters of the block that is under election - signed_vote (dict): a vote with the `signature` included. - - Returns: - bool: True if the signature is correct, False otherwise. - """ - - signature = signed_vote['signature'] - pk_base58 = signed_vote['node_pubkey'] - - # immediately return False if the voter is not in the block voter list - if pk_base58 not in voters: - return False - - public_key = crypto.PublicKey(pk_base58) - return public_key.verify(serialize(signed_vote['vote']).encode(), signature) +def output_has_owner(output, owner): + # TODO + # Check whether it is really necessary to treat the single key case + # differently from the multiple keys case, and why not just use the same + # function for both cases. + if len(output['public_keys']) > 1: + return condition_details_has_owner( + output['condition']['details'], owner) + elif len(output['public_keys']) == 1: + return output['condition']['details']['public_key'] == owner + # TODO raise proper exception, e.g. invalid tx payload? def is_genesis_block(block): diff --git a/bigchaindb/version.py b/bigchaindb/version.py index c59a3cbe..6bf027a0 100644 --- a/bigchaindb/version.py +++ b/bigchaindb/version.py @@ -1,2 +1,2 @@ -__version__ = '0.10.0.dev' -__short_version__ = '0.10.dev' +__version__ = '0.11.0.dev' +__short_version__ = '0.11.dev' diff --git a/bigchaindb/voting.py b/bigchaindb/voting.py new file mode 100644 index 00000000..cc20944d --- /dev/null +++ b/bigchaindb/voting.py @@ -0,0 +1,151 @@ +import collections + +from bigchaindb.common.schema import SchemaValidationError, validate_vote_schema +from bigchaindb.exceptions import CriticalDuplicateVote +from bigchaindb.common.utils import serialize +from bigchaindb.common.crypto import PublicKey + + +VALID = 'valid' +INVALID = 'invalid' +UNDECIDED = 'undecided' + + +class Voting: + """ + Everything to do with verifying and counting votes for block election. + + All functions in this class should be referentially transparent, that is, + they always give the same output for a given input. This makes it easier + to test. This also means no logging! + + Assumptions regarding data: + * Vote is a dictionary, but no assumptions are made on it's properties. + * Everything else is assumed to be structurally correct, otherwise errors + may be thrown. + """ + + @classmethod + def block_election(cls, block, votes, keyring): + """ + Calculate the election status of a block. + """ + eligible_voters = set(block['block']['voters']) & set(keyring) + n_voters = len(eligible_voters) + eligible_votes, ineligible_votes = \ + cls.partition_eligible_votes(votes, eligible_voters) + by_voter = cls.dedupe_by_voter(eligible_votes) + results = cls.count_votes(by_voter) + results['block_id'] = block['id'] + results['status'] = cls.decide_votes(n_voters, **results['counts']) + results['ineligible'] = ineligible_votes + return results + + @classmethod + def partition_eligible_votes(cls, votes, eligible_voters): + """ + Filter votes from unknown nodes or nodes that are not listed on + block. This is the primary Sybill protection. + """ + eligible, ineligible = ([], []) + + for vote in votes: + voter_eligible = vote.get('node_pubkey') in eligible_voters + if voter_eligible: + try: + if cls.verify_vote_signature(vote): + eligible.append(vote) + continue + except ValueError: + pass + ineligible.append(vote) + return eligible, ineligible + + @classmethod + def dedupe_by_voter(cls, eligible_votes): + """ + Throw a critical error if there is a duplicate vote + """ + by_voter = {} + for vote in eligible_votes: + pubkey = vote['node_pubkey'] + if pubkey in by_voter: + raise CriticalDuplicateVote(pubkey) + by_voter[pubkey] = vote + return by_voter + + @classmethod + def count_votes(cls, by_voter): + """ + Given a list of eligible votes, (votes from known nodes that are listed + as voters), produce the number that say valid and the number that say + invalid. Votes must agree on previous block, otherwise they become invalid. + """ + prev_blocks = collections.Counter() + malformed = [] + + for vote in by_voter.values(): + if not cls.verify_vote_schema(vote): + malformed.append(vote) + continue + + if vote['vote']['is_block_valid'] is True: + prev_blocks[vote['vote']['previous_block']] += 1 + + n_valid = 0 + prev_block = None + # Valid votes must agree on previous block + if prev_blocks: + prev_block, n_valid = prev_blocks.most_common()[0] + del prev_blocks[prev_block] + + return { + 'counts': { + 'n_valid': n_valid, + 'n_invalid': len(by_voter) - n_valid, + }, + 'malformed': malformed, + 'previous_block': prev_block, + 'other_previous_block': dict(prev_blocks), + } + + @classmethod + def decide_votes(cls, n_voters, n_valid, n_invalid): + """ + Decide on votes. + + To return VALID there must be a clear majority that say VALID + and also agree on the previous block. + + A tie on an even number of votes counts as INVALID. + """ + if n_invalid * 2 >= n_voters: + return INVALID + if n_valid * 2 > n_voters: + return VALID + return UNDECIDED + + @classmethod + def verify_vote_signature(cls, vote): + """ + Verify the signature of a vote + """ + signature = vote.get('signature') + pk_base58 = vote.get('node_pubkey') + + if not (type(signature) == str and type(pk_base58) == str): + raise ValueError('Malformed vote: %s' % vote) + + public_key = PublicKey(pk_base58) + body = serialize(vote['vote']).encode() + return public_key.verify(body, signature) + + @classmethod + def verify_vote_schema(cls, vote): + # I'm not sure this is the correct approach. Maybe we should allow + # duck typing w/r/t votes. + try: + validate_vote_schema(vote) + return True + except SchemaValidationError as e: + return False diff --git a/bigchaindb/web/server.py b/bigchaindb/web/server.py index bcd44d11..46495368 100644 --- a/bigchaindb/web/server.py +++ b/bigchaindb/web/server.py @@ -22,7 +22,7 @@ class StandaloneApplication(gunicorn.app.base.BaseApplication): - http://docs.gunicorn.org/en/latest/custom.html """ - def __init__(self, app, options=None): + def __init__(self, app, *, options=None): '''Initialize a new standalone application. Args: @@ -32,7 +32,7 @@ class StandaloneApplication(gunicorn.app.base.BaseApplication): ''' self.options = options or {} self.application = app - super(StandaloneApplication, self).__init__() + super().__init__() def load_config(self): config = dict((key, value) for key, value in self.options.items() @@ -88,7 +88,8 @@ def create_server(settings): if not settings.get('threads'): settings['threads'] = (multiprocessing.cpu_count() * 2) + 1 + settings['logger_class'] = 'bigchaindb.log.loggers.HttpServerLogger' app = create_app(debug=settings.get('debug', False), threads=settings['threads']) - standalone = StandaloneApplication(app, settings) + standalone = StandaloneApplication(app, options=settings) return standalone diff --git a/bigchaindb/web/views/base.py b/bigchaindb/web/views/base.py index 171a3bb6..0c226d7d 100644 --- a/bigchaindb/web/views/base.py +++ b/bigchaindb/web/views/base.py @@ -5,6 +5,9 @@ import logging from flask import jsonify, request +from bigchaindb import config + + logger = logging.getLogger(__name__) @@ -21,3 +24,8 @@ def make_error(status_code, message=None): def base_url(): return '%s://%s/' % (request.environ['wsgi.url_scheme'], request.environ['HTTP_HOST']) + + +def base_ws_uri(): + """Base websocket uri.""" + return 'ws://{host}:{port}'.format(**config['wsserver']) diff --git a/bigchaindb/web/views/blocks.py b/bigchaindb/web/views/blocks.py index 7e840fe5..1ea1a28f 100644 --- a/bigchaindb/web/views/blocks.py +++ b/bigchaindb/web/views/blocks.py @@ -1,8 +1,6 @@ """This module provides the blueprint for the blocks API endpoints. -For more information please refer to the documentation on ReadTheDocs: - - https://docs.bigchaindb.com/projects/server/en/latest/drivers-clients/ - http-client-server-api.html +For more information please refer to the documentation: http://bigchaindb.com/http-api """ from flask import current_app from flask_restful import Resource, reqparse diff --git a/bigchaindb/web/views/info.py b/bigchaindb/web/views/info.py index 04a15749..6b01b007 100644 --- a/bigchaindb/web/views/info.py +++ b/bigchaindb/web/views/info.py @@ -4,8 +4,9 @@ import flask from flask_restful import Resource import bigchaindb -from bigchaindb.web.views.base import base_url +from bigchaindb.web.views.base import base_url, base_ws_uri from bigchaindb import version +from bigchaindb.web.websocket_server import EVENTS_ENDPOINT class RootIndex(Resource): @@ -30,16 +31,18 @@ class RootIndex(Resource): class ApiV1Index(Resource): def get(self): api_root = base_url() + 'api/v1/' + websocket_root = base_ws_uri() + EVENTS_ENDPOINT docs_url = [ 'https://docs.bigchaindb.com/projects/server/en/v', version.__version__, - '/drivers-clients/http-client-server-api.html', + '/http-client-server-api.html', ] - return { + return flask.jsonify({ '_links': { 'docs': ''.join(docs_url), 'self': api_root, 'statuses': api_root + 'statuses/', 'transactions': api_root + 'transactions/', + 'streams_v1': websocket_root, }, - } + }) diff --git a/bigchaindb/web/views/statuses.py b/bigchaindb/web/views/statuses.py index 39f880b1..a8186146 100644 --- a/bigchaindb/web/views/statuses.py +++ b/bigchaindb/web/views/statuses.py @@ -1,8 +1,6 @@ """This module provides the blueprint for the statuses API endpoints. -For more information please refer to the documentation on ReadTheDocs: - - https://docs.bigchaindb.com/projects/server/en/latest/drivers-clients/ - http-client-server-api.html +For more information please refer to the documentation: http://bigchaindb.com/http-api """ from flask import current_app from flask_restful import Resource, reqparse diff --git a/bigchaindb/web/views/transactions.py b/bigchaindb/web/views/transactions.py index 7acaa279..9f024f54 100644 --- a/bigchaindb/web/views/transactions.py +++ b/bigchaindb/web/views/transactions.py @@ -1,28 +1,13 @@ """This module provides the blueprint for some basic API endpoints. -For more information please refer to the documentation on ReadTheDocs: - - https://docs.bigchaindb.com/projects/server/en/latest/drivers-clients/ - http-client-server-api.html +For more information please refer to the documentation: http://bigchaindb.com/http-api """ import logging from flask import current_app, request from flask_restful import Resource, reqparse - -from bigchaindb.common.exceptions import ( - AmountError, - DoubleSpend, - InvalidHash, - InvalidSignature, - SchemaValidationError, - OperationError, - TransactionDoesNotExist, - TransactionOwnerError, - TransactionNotInValidBlock, - ValidationError, -) - +from bigchaindb.common.exceptions import SchemaValidationError, ValidationError from bigchaindb.models import Transaction from bigchaindb.web.views.base import make_error from bigchaindb.web.views import parameters @@ -84,7 +69,7 @@ class TransactionListApi(Resource): message='Invalid transaction schema: {}'.format( e.__cause__.message) ) - except (ValidationError, InvalidSignature) as e: + except ValidationError as e: return make_error( 400, 'Invalid transaction ({}): {}'.format(type(e).__name__, e) @@ -93,15 +78,7 @@ class TransactionListApi(Resource): with pool() as bigchain: try: bigchain.validate_transaction(tx_obj) - except (ValueError, - OperationError, - TransactionDoesNotExist, - TransactionOwnerError, - DoubleSpend, - InvalidHash, - InvalidSignature, - TransactionNotInValidBlock, - AmountError) as e: + except ValidationError as e: return make_error( 400, 'Invalid transaction ({}): {}'.format(type(e).__name__, e) diff --git a/bigchaindb/web/views/votes.py b/bigchaindb/web/views/votes.py index 68265b40..45a86812 100644 --- a/bigchaindb/web/views/votes.py +++ b/bigchaindb/web/views/votes.py @@ -1,8 +1,6 @@ """This module provides the blueprint for the votes API endpoints. -For more information please refer to the documentation on ReadTheDocs: - - https://docs.bigchaindb.com/projects/server/en/latest/drivers-clients/ - http-client-server-api.html +For more information please refer to the documentation: http://bigchaindb.com/http-api """ from flask import current_app from flask_restful import Resource, reqparse diff --git a/bigchaindb/web/websocket_server.py b/bigchaindb/web/websocket_server.py new file mode 100644 index 00000000..0aa51ecb --- /dev/null +++ b/bigchaindb/web/websocket_server.py @@ -0,0 +1,154 @@ +"""WebSocket server for the BigchainDB Event Stream API.""" + +# NOTE +# +# This module contains some functions and utilities that might belong to other +# modules. For now, I prefer to keep everything in this module. Why? Because +# those functions are needed only here. +# +# When we will extend this part of the project and we find that we need those +# functionalities elsewhere, we can start creating new modules and organizing +# things in a better way. + + +import json +import asyncio +import logging +import threading +from uuid import uuid4 + +import aiohttp +from aiohttp import web + +from bigchaindb import config +from bigchaindb.events import EventTypes + + +logger = logging.getLogger(__name__) +POISON_PILL = 'POISON_PILL' +EVENTS_ENDPOINT = '/api/v1/streams/valid_tx' + + +def _multiprocessing_to_asyncio(in_queue, out_queue, loop): + """Bridge between a synchronous multiprocessing queue + and an asynchronous asyncio queue. + + Args: + in_queue (multiprocessing.Queue): input queue + out_queue (asyncio.Queue): output queue + """ + + while True: + value = in_queue.get() + loop.call_soon_threadsafe(out_queue.put_nowait, value) + + +class Dispatcher: + """Dispatch events to websockets. + + This class implements a simple publish/subscribe pattern. + """ + + def __init__(self, event_source): + """Create a new instance. + + Args: + event_source: a source of events. Elements in the queue + should be strings. + """ + + self.event_source = event_source + self.subscribers = {} + + def subscribe(self, uuid, websocket): + """Add a websocket to the list of subscribers. + + Args: + uuid (str): a unique identifier for the websocket. + websocket: the websocket to publish information. + """ + + self.subscribers[uuid] = websocket + + @asyncio.coroutine + def publish(self): + """Publish new events to the subscribers.""" + + while True: + event = yield from self.event_source.get() + str_buffer = [] + + if event == POISON_PILL: + return + + if isinstance(event, str): + str_buffer.append(event) + + elif event.type == EventTypes.BLOCK_VALID: + block = event.data + + for tx in block['block']['transactions']: + asset_id = tx['id'] if tx['operation'] == 'CREATE' else tx['asset']['id'] + data = {'block_id': block['id'], + 'asset_id': asset_id, + 'tx_id': tx['id']} + str_buffer.append(json.dumps(data)) + + for _, websocket in self.subscribers.items(): + for str_item in str_buffer: + websocket.send_str(str_item) + + +@asyncio.coroutine +def websocket_handler(request): + """Handle a new socket connection.""" + + logger.debug('New websocket connection.') + websocket = web.WebSocketResponse() + yield from websocket.prepare(request) + uuid = uuid4() + request.app['dispatcher'].subscribe(uuid, websocket) + + while True: + # Consume input buffer + msg = yield from websocket.receive() + if msg.type == aiohttp.WSMsgType.ERROR: + logger.debug('Websocket exception: %s', websocket.exception()) + return + + +def init_app(event_source, *, loop=None): + """Init the application server. + + Return: + An aiohttp application. + """ + + dispatcher = Dispatcher(event_source) + + # Schedule the dispatcher + loop.create_task(dispatcher.publish()) + + app = web.Application(loop=loop) + app['dispatcher'] = dispatcher + app.router.add_get(EVENTS_ENDPOINT, websocket_handler) + return app + + +def start(sync_event_source, loop=None): + """Create and start the WebSocket server.""" + + if not loop: + loop = asyncio.get_event_loop() + + event_source = asyncio.Queue(loop=loop) + + bridge = threading.Thread(target=_multiprocessing_to_asyncio, + args=(sync_event_source, event_source, loop), + daemon=True) + bridge.start() + + app = init_app(event_source, loop=loop) + aiohttp.web.run_app(app, + host=config['wsserver']['host'], + port=config['wsserver']['port']) diff --git a/codecov.yml b/codecov.yml index b6f22af9..0ab4582d 100644 --- a/codecov.yml +++ b/codecov.yml @@ -29,9 +29,8 @@ coverage: - "docs/*" - "tests/*" - "bigchaindb/version.py" - - "benchmarking-tests/*" - - "speed-tests/*" - "ntools/*" + - "k8s/*" comment: # @stevepeak (from codecov.io) suggested we change 'suggestions' to 'uncovered' diff --git a/deploy-cluster-aws/awsdeploy.sh b/deploy-cluster-aws/awsdeploy.sh index 00d1f431..b733ef2d 100755 --- a/deploy-cluster-aws/awsdeploy.sh +++ b/deploy-cluster-aws/awsdeploy.sh @@ -39,7 +39,6 @@ fi echo "NUM_NODES = "$NUM_NODES echo "BRANCH = "$BRANCH -echo "WHAT_TO_DEPLOY = "$WHAT_TO_DEPLOY echo "SSH_KEY_NAME" = $SSH_KEY_NAME echo "USE_KEYPAIRS_FILE = "$USE_KEYPAIRS_FILE echo "IMAGE_ID = "$IMAGE_ID @@ -85,7 +84,7 @@ if [[ $CONFILES_COUNT != $NUM_NODES ]]; then fi # Auto-generate the tag to apply to all nodes in the cluster -TAG="BDB-"$WHAT_TO_DEPLOY"-"`date +%m-%d@%H:%M` +TAG="BDB-Server-"`date +%m-%d@%H:%M` echo "TAG = "$TAG # Change the file permissions on the SSH private key file @@ -121,25 +120,24 @@ fab install_base_software fab get_pip3 fab upgrade_setuptools -if [ "$WHAT_TO_DEPLOY" == "servers" ]; then - # (Re)create the RethinkDB configuration file conf/rethinkdb.conf - if [ "$ENABLE_WEB_ADMIN" == "True" ]; then - if [ "$BIND_HTTP_TO_LOCALHOST" == "True" ]; then - python create_rethinkdb_conf.py --enable-web-admin --bind-http-to-localhost - else - python create_rethinkdb_conf.py --enable-web-admin - fi +# (Re)create the RethinkDB configuration file conf/rethinkdb.conf +if [ "$ENABLE_WEB_ADMIN" == "True" ]; then + if [ "$BIND_HTTP_TO_LOCALHOST" == "True" ]; then + python create_rethinkdb_conf.py --enable-web-admin --bind-http-to-localhost else - python create_rethinkdb_conf.py + python create_rethinkdb_conf.py --enable-web-admin fi - # Rollout RethinkDB and start it - fab prep_rethinkdb_storage:$USING_EBS - fab install_rethinkdb - fab configure_rethinkdb - fab delete_rethinkdb_data - fab start_rethinkdb +else + python create_rethinkdb_conf.py fi +# Rollout RethinkDB and start it +fab prep_rethinkdb_storage:$USING_EBS +fab install_rethinkdb +fab configure_rethinkdb +fab delete_rethinkdb_data +fab start_rethinkdb + # Rollout BigchainDB (but don't start it yet) if [ "$BRANCH" == "pypi" ]; then fab install_bigchaindb_from_pypi @@ -156,48 +154,40 @@ fi # Configure BigchainDB on all nodes -if [ "$WHAT_TO_DEPLOY" == "servers" ]; then - # The idea is to send a bunch of locally-created configuration - # files out to each of the instances / nodes. +# The idea is to send a bunch of locally-created configuration +# files out to each of the instances / nodes. - # Assume a set of $NUM_NODES BigchaindB config files - # already exists in the confiles directory. - # One can create a set using a command like - # ./make_confiles.sh confiles $NUM_NODES - # (We can't do that here now because this virtual environment - # is a Python 2 environment that may not even have - # bigchaindb installed, so bigchaindb configure can't be called) +# Assume a set of $NUM_NODES BigchaindB config files +# already exists in the confiles directory. +# One can create a set using a command like +# ./make_confiles.sh confiles $NUM_NODES +# (We can't do that here now because this virtual environment +# is a Python 2 environment that may not even have +# bigchaindb installed, so bigchaindb configure can't be called) - # Transform the config files in the confiles directory - # to have proper keyrings etc. - if [ "$USE_KEYPAIRS_FILE" == "True" ]; then - python clusterize_confiles.py -k confiles $NUM_NODES - else - python clusterize_confiles.py confiles $NUM_NODES - fi - - # Send one of the config files to each instance - for (( HOST=0 ; HOST<$NUM_NODES ; HOST++ )); do - CONFILE="bcdb_conf"$HOST - echo "Sending "$CONFILE - fab set_host:$HOST send_confile:$CONFILE - done - - # Initialize BigchainDB (i.e. Create the RethinkDB database, - # the tables, the indexes, and genesis glock). Note that - # this will only be sent to one of the nodes, see the - # definition of init_bigchaindb() in fabfile.py to see why. - fab init_bigchaindb - fab set_shards:$NUM_NODES - echo "To set the replication factor to 3, do: fab set_replicas:3" - echo "To start BigchainDB on all the nodes, do: fab start_bigchaindb" +# Transform the config files in the confiles directory +# to have proper keyrings etc. +if [ "$USE_KEYPAIRS_FILE" == "True" ]; then + python clusterize_confiles.py -k confiles $NUM_NODES else - # Deploying clients - fab send_client_confile:client_confile - - # Start sending load from the clients to the servers - fab start_bigchaindb_load + python clusterize_confiles.py confiles $NUM_NODES fi +# Send one of the config files to each instance +for (( HOST=0 ; HOST<$NUM_NODES ; HOST++ )); do + CONFILE="bcdb_conf"$HOST + echo "Sending "$CONFILE + fab set_host:$HOST send_confile:$CONFILE +done + +# Initialize BigchainDB (i.e. Create the RethinkDB database, +# the tables, the indexes, and genesis glock). Note that +# this will only be sent to one of the nodes, see the +# definition of init_bigchaindb() in fabfile.py to see why. +fab init_bigchaindb +fab set_shards:$NUM_NODES +echo "To set the replication factor to 3, do: fab set_replicas:3" +echo "To start BigchainDB on all the nodes, do: fab start_bigchaindb" + # cleanup rm add2known_hosts.sh diff --git a/deploy-cluster-aws/example_deploy_conf.py b/deploy-cluster-aws/example_deploy_conf.py index 623151ef..6aab8f30 100644 --- a/deploy-cluster-aws/example_deploy_conf.py +++ b/deploy-cluster-aws/example_deploy_conf.py @@ -23,10 +23,6 @@ NUM_NODES=3 # It's where to get the BigchainDB code to be deployed on the nodes BRANCH="master" -# WHAT_TO_DEPLOY is either "servers" or "clients" -# What do you want to deploy? -WHAT_TO_DEPLOY="servers" - # SSH_KEY_NAME is the name of the SSH private key file # in $HOME/.ssh/ # It is used for SSH communications with AWS instances. diff --git a/deploy-cluster-aws/fabfile.py b/deploy-cluster-aws/fabfile.py index 9ef24edd..737109f9 100644 --- a/deploy-cluster-aws/fabfile.py +++ b/deploy-cluster-aws/fabfile.py @@ -237,15 +237,6 @@ def send_confile(confile): run('bigchaindb show-config') -@task -@parallel -def send_client_confile(confile): - put(confile, 'tempfile') - run('mv tempfile ~/.bigchaindb') - print('For this node, bigchaindb show-config says:') - run('bigchaindb show-config') - - # Initialize BigchainDB # i.e. create the database, the tables, # the indexes, and the genesis block. @@ -278,12 +269,6 @@ def start_bigchaindb(): sudo('screen -d -m bigchaindb -y start &', pty=False) -@task -@parallel -def start_bigchaindb_load(): - sudo('screen -d -m bigchaindb load &', pty=False) - - # Install and run New Relic @task @parallel diff --git a/deploy-cluster-aws/launch_ec2_nodes.py b/deploy-cluster-aws/launch_ec2_nodes.py index e02b7b62..1c50f895 100644 --- a/deploy-cluster-aws/launch_ec2_nodes.py +++ b/deploy-cluster-aws/launch_ec2_nodes.py @@ -26,7 +26,7 @@ import boto3 from awscommon import get_naeips -SETTINGS = ['NUM_NODES', 'BRANCH', 'WHAT_TO_DEPLOY', 'SSH_KEY_NAME', +SETTINGS = ['NUM_NODES', 'BRANCH', 'SSH_KEY_NAME', 'USE_KEYPAIRS_FILE', 'IMAGE_ID', 'INSTANCE_TYPE', 'SECURITY_GROUP', 'USING_EBS', 'EBS_VOLUME_SIZE', 'EBS_OPTIMIZED', 'ENABLE_WEB_ADMIN', 'BIND_HTTP_TO_LOCALHOST'] @@ -77,9 +77,6 @@ if not isinstance(NUM_NODES, int): if not isinstance(BRANCH, str): raise SettingsTypeError('BRANCH should be a string') -if not isinstance(WHAT_TO_DEPLOY, str): - raise SettingsTypeError('WHAT_TO_DEPLOY should be a string') - if not isinstance(SSH_KEY_NAME, str): raise SettingsTypeError('SSH_KEY_NAME should be a string') @@ -117,11 +114,6 @@ if NUM_NODES > 64: 'The AWS deployment configuration file sets it to {}'. format(NUM_NODES)) -if WHAT_TO_DEPLOY not in ['servers', 'clients']: - raise ValueError('WHAT_TO_DEPLOY should be either "servers" or "clients". ' - 'The AWS deployment configuration file sets it to {}'. - format(WHAT_TO_DEPLOY)) - if SSH_KEY_NAME in ['not-set-yet', '', None]: raise ValueError('SSH_KEY_NAME should be set. ' 'The AWS deployment configuration file sets it to {}'. @@ -298,7 +290,7 @@ print('Writing hostlist.py') with open('hostlist.py', 'w') as f: f.write('# -*- coding: utf-8 -*-\n') f.write('"""A list of the public DNS names of all the nodes in this\n') - f.write('BigchainDB cluster/federation.\n') + f.write('BigchainDB cluster.\n') f.write('"""\n') f.write('\n') f.write('from __future__ import unicode_literals\n') diff --git a/docker-compose.yml b/docker-compose.yml index f5dbcdc9..322cbcf6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: '2' services: mdb: - image: mongo:3.4.1 + image: mongo:3.4.3 ports: - "27017" command: mongod --replSet=bigchain-rs @@ -28,7 +28,7 @@ services: - /data command: "true" - bdb: + bdb-rdb: build: context: . dockerfile: Dockerfile-dev @@ -50,7 +50,7 @@ services: - "9984" command: bigchaindb start - bdb-mdb: + bdb: build: context: . dockerfile: Dockerfile-dev diff --git a/docs/root/source/assets.rst b/docs/root/source/assets.rst index 50b8ad25..14982406 100644 --- a/docs/root/source/assets.rst +++ b/docs/root/source/assets.rst @@ -3,7 +3,7 @@ How BigchainDB is Good for Asset Registrations & Transfers BigchainDB can store data of any kind (within reason), but it's designed to be particularly good for storing asset registrations and transfers: -* The fundamental thing that one submits to a BigchainDB federation to be checked and stored (if valid) is a *transaction*, and there are two kinds: CREATE transactions and TRANSFER transactions. +* The fundamental thing that one sends to a BigchainDB cluster, to be checked and stored (if valid), is a *transaction*, and there are two kinds: CREATE transactions and TRANSFER transactions. * A CREATE transaction can be use to register any kind of asset (divisible or indivisible), along with arbitrary metadata. * An asset can have zero, one, or several owners. * The owners of an asset can specify (crypto-)conditions which must be satisified by anyone wishing transfer the asset to new owners. For example, a condition might be that at least 3 of the 5 current owners must cryptographically sign a transfer transaction. diff --git a/docs/root/source/conf.py b/docs/root/source/conf.py index 50dec3ea..0d799fed 100644 --- a/docs/root/source/conf.py +++ b/docs/root/source/conf.py @@ -58,7 +58,7 @@ master_doc = 'index' # General information about the project. project = 'BigchainDB' -copyright = '2016, BigchainDB Contributors' +copyright = '2017, BigchainDB Contributors' author = 'BigchainDB Contributors' # The version info for the project you're documenting, acts as replacement for diff --git a/docs/root/source/decentralized.md b/docs/root/source/decentralized.md index 7f0b8e95..3b82ae46 100644 --- a/docs/root/source/decentralized.md +++ b/docs/root/source/decentralized.md @@ -4,18 +4,18 @@ Decentralization means that no one owns or controls everything, and there is no Ideally, each node in a BigchainDB cluster is owned and controlled by a different person or organization. Even if the cluster lives within one organization, it's still preferable to have each node controlled by a different person or subdivision. -We use the phrase "BigchainDB federation" (or just "federation") to refer to the set of people and/or organizations who run the nodes of a BigchainDB cluster. A federation requires some form of governance to make decisions such as membership and policies. The exact details of the governance process are determined by each federation, but it can be very decentralized (e.g. purely vote-based, where each node gets a vote, and there are no special leadership roles). +We use the phrase "BigchainDB consortium" (or just "consortium") to refer to the set of people and/or organizations who run the nodes of a BigchainDB cluster. A consortium requires some form of governance to make decisions such as membership and policies. The exact details of the governance process are determined by each consortium, but it can be very decentralized (e.g. purely vote-based, where each node gets a vote, and there are no special leadership roles). -The actual data is decentralized in that it doesn’t all get stored in one place. Each federation node stores the primary of one shard and replicas of some other shards. (A shard is a subset of the total set of documents.) Sharding and replication are handled by RethinkDB. +If sharding is turned on (i.e. if the number of shards is larger than one), then the actual data is decentralized in that no one node stores all the data. -Every node has its own locally-stored list of the public keys of other federation members: the so-called keyring. There's no centrally-stored or centrally-shared keyring. +Every node has its own locally-stored list of the public keys of other consortium members: the so-called keyring. There's no centrally-stored or centrally-shared keyring. -A federation can increase its decentralization (and its resilience) by increasing its jurisdictional diversity, geographic diversity, and other kinds of diversity. This idea is expanded upon in [the section on node diversity](diversity.html). +A consortium can increase its decentralization (and its resilience) by increasing its jurisdictional diversity, geographic diversity, and other kinds of diversity. This idea is expanded upon in [the section on node diversity](diversity.html). -There’s no node that has a long-term special position in the federation. All nodes run the same software and perform the same duties. +There’s no node that has a long-term special position in the cluster. All nodes run the same software and perform the same duties. -RethinkDB has an “admin” user which can’t be deleted and which can make big changes to the database, such as dropping a table. Right now, that’s a big security vulnerability, but we have plans to mitigate it by: +RethinkDB and MongoDB have an “admin” user which can’t be deleted and which can make big changes to the database, such as dropping a table. Right now, that’s a big security vulnerability, but we have plans to mitigate it by: 1. Locking down the admin user as much as possible. -2. Having all nodes inspect RethinkDB admin-type requests before acting on them. Requests can be checked against an evolving whitelist of allowed actions (voted on by federation nodes). +2. Having all nodes inspect admin-type requests before acting on them. Requests can be checked against an evolving whitelist of allowed actions. Nodes requesing non-allowed requests can be removed from the list of cluster nodes. It’s worth noting that the RethinkDB admin user can’t transfer assets, even today. The only way to create a valid transfer transaction is to fulfill the current (crypto) conditions on the asset, and the admin user can’t do that because the admin user doesn’t have the necessary private keys (or preimages, in the case of hashlock conditions). They’re not stored in the database. diff --git a/docs/root/source/diversity.md b/docs/root/source/diversity.md index 4819a0af..20c9afb5 100644 --- a/docs/root/source/diversity.md +++ b/docs/root/source/diversity.md @@ -6,6 +6,6 @@ Steps should be taken to make it difficult for any one actor or event to control 2. **Geographic diversity.** The servers should be physically located at multiple geographic locations, so that it becomes difficult for a natural disaster (such as a flood or earthquake) to damage enough of them to cause problems. 3. **Hosting diversity.** The servers should be hosted by multiple hosting providers (e.g. Amazon Web Services, Microsoft Azure, Digital Ocean, Rackspace), so that it becomes difficult for one hosting provider to influence enough of the nodes. 4. **Operating system diversity.** The servers should use a variety of operating systems, so that a security bug in one OS can’t be used to exploit enough of the nodes. -5. **Diversity in general.** In general, membership diversity (of all kinds) confers many advantages on a federation. For example, it provides the federation with a source of various ideas for addressing challenges. +5. **Diversity in general.** In general, membership diversity (of all kinds) confers many advantages on a consortium. For example, it provides the consortium with a source of various ideas for addressing challenges. -Note: If all the nodes are running the same code, i.e. the same implementation of BigchainDB, then a bug in that code could be used to compromise all of the nodes. Ideally, there would be several different, well-maintained implementations of BigchainDB Server (e.g. one in Python, one in Go, etc.), so that a federation could also have a diversity of server implementations. +Note: If all the nodes are running the same code, i.e. the same implementation of BigchainDB, then a bug in that code could be used to compromise all of the nodes. Ideally, there would be several different, well-maintained implementations of BigchainDB Server (e.g. one in Python, one in Go, etc.), so that a consortium could also have a diversity of server implementations. diff --git a/docs/root/source/immutable.md b/docs/root/source/immutable.md index 28fb5999..a20c40b8 100644 --- a/docs/root/source/immutable.md +++ b/docs/root/source/immutable.md @@ -8,12 +8,12 @@ It’s true that blockchain data is more difficult to change than usual: it’s BigchainDB achieves strong tamper-resistance in the following ways: -1. **Replication.** All data is sharded and shards are replicated in several (different) places. The replication factor can be set by the federation. The higher the replication factor, the more difficult it becomes to change or delete all replicas. +1. **Replication.** All data is sharded and shards are replicated in several (different) places. The replication factor can be set by the consortium. The higher the replication factor, the more difficult it becomes to change or delete all replicas. 2. **Internal watchdogs.** All nodes monitor all changes and if some unallowed change happens, then appropriate action is taken. For example, if a valid block is deleted, then it is put back. -3. **External watchdogs.** Federations may opt to have trusted third-parties to monitor and audit their data, looking for irregularities. For federations with publicly-readable data, the public can act as an auditor. +3. **External watchdogs.** A consortium may opt to have trusted third-parties to monitor and audit their data, looking for irregularities. For a consortium with publicly-readable data, the public can act as an auditor. 4. **Cryptographic signatures** are used throughout BigchainDB as a way to check if messages (transactions, blocks and votes) have been tampered with enroute, and as a way to verify who signed the messages. Each block is signed by the node that created it. Each vote is signed by the node that cast it. A creation transaction is signed by the node that created it, although there are plans to improve that by adding signatures from the sending client and multiple nodes; see [Issue #347](https://github.com/bigchaindb/bigchaindb/issues/347). Transfer transactions can contain multiple inputs (fulfillments, one per asset transferred). Each fulfillment will typically contain one or more signatures from the owners (i.e. the owners before the transfer). Hashlock fulfillments are an exception; there’s an open issue ([#339](https://github.com/bigchaindb/bigchaindb/issues/339)) to address that. 5. **Full or partial backups** of the database may be recorded from time to time, possibly on magnetic tape storage, other blockchains, printouts, etc. 6. **Strong security.** Node owners can adopt and enforce strong security policies. 7. **Node diversity.** Diversity makes it so that no one thing (e.g. natural disaster or operating system bug) can compromise enough of the nodes. See [the section on the kinds of node diversity](diversity.html). -Some of these things come "for free" as part of the BigchainDB software, and others require some extra effort from the federation and node owners. +Some of these things come "for free" as part of the BigchainDB software, and others require some extra effort from the consortium and node owners. diff --git a/docs/root/source/index.rst b/docs/root/source/index.rst index 003d07b3..1dd71003 100644 --- a/docs/root/source/index.rst +++ b/docs/root/source/index.rst @@ -53,7 +53,7 @@ At a high level, one can communicate with a BigchainDB cluster (set of nodes) us
Python Driver Docs diff --git a/docs/root/source/terminology.md b/docs/root/source/terminology.md index fb2a3bdf..66375b38 100644 --- a/docs/root/source/terminology.md +++ b/docs/root/source/terminology.md @@ -1,22 +1,22 @@ # Terminology -There is some specialized terminology associated with BigchainDB. To get started, you should at least know what what we mean by a BigchainDB *node*, *cluster* and *federation*. +There is some specialized terminology associated with BigchainDB. To get started, you should at least know the following: -## Node +## BigchainDB Node -A **BigchainDB node** is a machine or set of closely-linked machines running RethinkDB Server, BigchainDB Server, and related software. (A "machine" might be a bare-metal server, a virtual machine or a container.) Each node is controlled by one person or organization. +A **BigchainDB node** is a machine or set of closely-linked machines running RethinkDB/MongoDB Server, BigchainDB Server, and related software. Each node is controlled by one person or organization. -## Cluster +## BigchainDB Cluster -A set of BigchainDB nodes can connect to each other to form a **cluster**. Each node in the cluster runs the same software. A cluster contains one logical RethinkDB datastore. A cluster may have additional machines to do things such as cluster monitoring. +A set of BigchainDB nodes can connect to each other to form a **BigchainDB cluster**. Each node in the cluster runs the same software. A cluster contains one logical RethinkDB/MongoDB datastore. A cluster may have additional machines to do things such as cluster monitoring. -## Federation +## BigchainDB Consortium -The people and organizations that run the nodes in a cluster belong to a **federation** (i.e. another organization). A federation must have some sort of governance structure to make decisions. If a cluster is run by a single company, then the federation is just that company. +The people and organizations that run the nodes in a cluster belong to a **BigchainDB consortium** (i.e. another organization). A consortium must have some sort of governance structure to make decisions. If a cluster is run by a single company, then the "consortium" is just that company. -**What's the Difference Between a Cluster and a Federation?** +**What's the Difference Between a Cluster and a Consortium?** -A cluster is just a bunch of connected nodes. A federation is an organization which has a cluster, and where each node in the cluster has a different operator. Confusingly, we sometimes call a federation's cluster its "federation." You can probably tell what we mean from context. \ No newline at end of file +A cluster is just a bunch of connected nodes. A consortium is an organization which has a cluster, and where each node in the cluster has a different operator. \ No newline at end of file diff --git a/docs/server/generate_http_server_api_documentation.py b/docs/server/generate_http_server_api_documentation.py index ba082ba3..731bee2c 100644 --- a/docs/server/generate_http_server_api_documentation.py +++ b/docs/server/generate_http_server_api_documentation.py @@ -269,7 +269,7 @@ def main(): ctx['block_list'] = pretty_json(block_list) base_path = os.path.join(os.path.dirname(__file__), - 'source/drivers-clients/samples') + 'source/http-samples') if not os.path.exists(base_path): os.makedirs(base_path) diff --git a/docs/server/source/_static/Node-components.png b/docs/server/source/_static/Node-components.png index cd086073..326b6af6 100644 Binary files a/docs/server/source/_static/Node-components.png and b/docs/server/source/_static/Node-components.png differ diff --git a/docs/server/source/appendices/aws-setup.md b/docs/server/source/appendices/aws-setup.md index 0471f8af..793f4d36 100644 --- a/docs/server/source/appendices/aws-setup.md +++ b/docs/server/source/appendices/aws-setup.md @@ -18,7 +18,7 @@ pip install awscli ## Create an AWS Access Key -The next thing you'll need is an AWS access key. If you don't have one, you can create one using the [instructions in the AWS documentation](http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html). You should get an access key ID (e.g. AKIAIOSFODNN7EXAMPLE) and a secret access key (e.g. wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY). +The next thing you'll need is AWS access keys (access key ID and secret access key). If you don't have those, see [the AWS documentation about access keys](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys). You should also pick a default AWS region name (e.g. `eu-central-1`). That's where your cluster will run. The AWS documentation has [a list of them](http://docs.aws.amazon.com/general/latest/gr/rande.html#ec2_region). @@ -42,23 +42,10 @@ This writes two files: `~/.aws/credentials` and `~/.aws/config`. AWS tools and p Eventually, you'll have one or more instances (virtual machines) running on AWS and you'll want to SSH to them. To do that, you need a public/private key pair. The public key will be sent to AWS, and you can tell AWS to put it in any instances you provision there. You'll keep the private key on your local workstation. -First you need to make up a key name. Some ideas: +See the [page about how to generate a key pair for SSH](generate-key-pair-for-ssh.html). -* `bcdb-troy-1` -* `bigchaindb-7` -* `bcdb-jupiter` -If you already have key pairs on AWS (Amazon EC2), you have to pick a name that's not already being used. -Below, replace every instance of `` with your actual key name. -To generate a public/private RSA key pair with that name: -```text -ssh-keygen -t rsa -C "" -f ~/.ssh/ -``` - -It will ask you for a passphrase. You can use whatever passphrase you like, but don't lose it. Two keys (files) will be created in `~/.ssh/`: - -1. `~/.ssh/.pub` is the public key -2. `~/.ssh/` is the private key +## Send the Public Key to AWS To send the public key to AWS, use the AWS Command-Line Interface: ```text diff --git a/docs/server/source/appendices/commands.rst b/docs/server/source/appendices/commands.rst new file mode 100644 index 00000000..460145f4 --- /dev/null +++ b/docs/server/source/appendices/commands.rst @@ -0,0 +1,18 @@ +###################### +Command Line Interface +###################### + +.. automodule:: bigchaindb.commands + :special-members: __init__ + + +:mod:`bigchaindb.commands.bigchaindb` +------------------------------------- + +.. automodule:: bigchaindb.commands.bigchaindb + + +:mod:`bigchaindb.commands.utils` +-------------------------------- + +.. automodule:: bigchaindb.commands.utils diff --git a/docs/server/source/appendices/consensus.rst b/docs/server/source/appendices/consensus.rst deleted file mode 100644 index 34c0c032..00000000 --- a/docs/server/source/appendices/consensus.rst +++ /dev/null @@ -1,5 +0,0 @@ -######### -Consensus -######### - -.. automodule:: bigchaindb.consensus diff --git a/docs/server/source/appendices/docker-on-mac.md b/docs/server/source/appendices/docker-on-mac.md new file mode 100644 index 00000000..7f87540f --- /dev/null +++ b/docs/server/source/appendices/docker-on-mac.md @@ -0,0 +1,101 @@ +# Run BigchainDB with Docker On Mac + +**NOT for Production Use** + +Those developing on Mac can follow this document to run BigchainDB in docker +containers for a quick dev setup. +Running BigchainDB on Mac (Docker or otherwise) is not officially supported. + +Support is very much limited as there are certain things that work differently +in Docker for Mac than Docker for other platforms. +Also, we do not use mac for our development and testing. :) + +This page may not be up to date with various settings and docker updates at +all the times. + +These steps work as of this writing (2017.Mar.09) and might break in the +future with updates to Docker for mac. +Community contribution to make BigchainDB run on Docker for Mac will always be +welcome. + + +## Prerequisite + +Install Docker for Mac. + +## (Optional) For a clean start + +1. Stop all BigchainDB and RethinkDB/MongoDB containers. +2. Delete all BigchainDB docker images. +3. Delete the ~/bigchaindb_docker folder. + + +## Pull the images + +Pull the bigchaindb and other required docker images from docker hub. + +```text +docker pull bigchaindb/bigchaindb:master +docker pull [rethinkdb:2.3|mongo:3.4.1] +``` + +## Create the BigchainDB configuration file on Mac +```text +docker run \ + --rm \ + --volume $HOME/bigchaindb_docker:/data \ + bigchaindb/bigchaindb:master \ + -y configure \ + [mongodb|rethinkdb] +``` + +To ensure that BigchainDB connects to the backend database bound to the virtual +interface `172.17.0.1`, you must edit the BigchainDB configuration file +(`~/bigchaindb_docker/.bigchaindb`) and change database.host from `localhost` +to `172.17.0.1`. + + +## Run the backend database on Mac + +From v0.9 onwards, you can run RethinkDB or MongoDB. + +We use the virtual interface created by the Docker daemon to allow +communication between the BigchainDB and database containers. +It has an IP address of 172.17.0.1 by default. + +You can also use docker host networking or bind to your primary (eth) +interface, if needed. + +### For RethinkDB backend +```text +docker run \ + --name=rethinkdb \ + --publish=28015:28015 \ + --publish=8080:8080 \ + --restart=always \ + --volume $HOME/bigchaindb_docker:/data \ + rethinkdb:2.3 +``` + +### For MongoDB backend +```text +docker run \ + --name=mongodb \ + --publish=27017:27017 \ + --restart=always \ + --volume=$HOME/bigchaindb_docker/db:/data/db \ + --volume=$HOME/bigchaindb_docker/configdb:/data/configdb \ + mongo:3.4.1 --replSet=bigchain-rs +``` + +### Run BigchainDB on Mac +```text +docker run \ + --name=bigchaindb \ + --publish=9984:9984 \ + --restart=always \ + --volume=$HOME/bigchaindb_docker:/data \ + bigchaindb/bigchaindb \ + start +``` + diff --git a/docs/server/source/appendices/example-rethinkdb-storage-setups.md b/docs/server/source/appendices/example-rethinkdb-storage-setups.md deleted file mode 100755 index 0fc4c273..00000000 --- a/docs/server/source/appendices/example-rethinkdb-storage-setups.md +++ /dev/null @@ -1,25 +0,0 @@ -# Example RethinkDB Storage Setups - -## Example Amazon EC2 Setups - -We have some scripts for [deploying a _test_ BigchainDB cluster on AWS](../clusters-feds/aws-testing-cluster.html). Those scripts include command sequences to set up storage for RethinkDB. -In particular, look in the file [/deploy-cluster-aws/fabfile.py](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/fabfile.py), under `def prep_rethinkdb_storage(USING_EBS)`. Note that there are two cases: - -1. **Using EBS ([Amazon Elastic Block Store](https://aws.amazon.com/ebs/)).** This is always an option, and for some instance types ("EBS-only"), it's the only option. -2. **Using an "instance store" volume provided with an Amazon EC2 instance.** Note that our scripts only use one of the (possibly many) volumes in the instance store. - -There's some explanation of the steps in the [Amazon EC2 documentation about making an Amazon EBS volume available for use](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-using-volumes.html). - -You shouldn't use an EC2 "instance store" to store RethinkDB data for a production node, because it's not replicated and it's only intended for temporary, ephemeral data. If the associated instance crashes, is stopped, or is terminated, the data in the instance store is lost forever. Amazon EBS storage is replicated, has incremental snapshots, and is low-latency. - - -## Example Using Amazon EFS - -TODO - - -## Other Examples? - -TODO - -Maybe RAID, ZFS, ... (over EBS volumes, i.e. a DIY Amazon EFS) diff --git a/docs/server/source/appendices/firewall-notes.md b/docs/server/source/appendices/firewall-notes.md index cd440774..b7af6c22 100644 --- a/docs/server/source/appendices/firewall-notes.md +++ b/docs/server/source/appendices/firewall-notes.md @@ -8,9 +8,10 @@ This is a page of notes on the ports potentially used by BigchainDB nodes and th Assuming you aren't exposing the RethinkDB web interface on port 8080 (or any other port, because [there are more secure ways to access it](https://www.rethinkdb.com/docs/security/#binding-the-web-interface-port)), there are only three ports that should expect unsolicited inbound traffic: 1. **Port 22** can expect inbound SSH (TCP) traffic from the node administrator (i.e. a small set of IP addresses). -2. **Port 9984** can expect inbound HTTP (TCP) traffic from BigchainDB clients sending transactions to the BigchainDB HTTP API. -3. If you're using RethinkDB, **Port 29015** can expect inbound TCP traffic from other RethinkDB nodes in the RethinkDB cluster (for RethinkDB intracluster communications). -4. If you're using MongoDB, **Port 27017** can expect inbound TCP traffic from other nodes. +1. **Port 9984** can expect inbound HTTP (TCP) traffic from BigchainDB clients sending transactions to the BigchainDB HTTP API. +1. **Port 9985** can expect inbount WebSocket traffic from BigchainDB clients. +1. If you're using RethinkDB, **Port 29015** can expect inbound TCP traffic from other RethinkDB nodes in the RethinkDB cluster (for RethinkDB intracluster communications). +1. If you're using MongoDB, **Port 27017** can expect inbound TCP traffic from other nodes. All other ports should only get inbound traffic in response to specific requests from inside the node. @@ -59,6 +60,11 @@ If Gunicorn and the reverse proxy are running on the same server, then you'll ha You may want to have Gunicorn and the reverse proxy running on different servers, so that both can listen on port 9984. That would also help isolate the effects of a denial-of-service attack. +## Port 9985 + +Port 9985 is the default port for the [BigchainDB WebSocket Event Stream API](../websocket-event-stream-api.html). + + ## Port 28015 Port 28015 is the default port used by RethinkDB client driver connections (TCP). If your BigchainDB node is just one server, then Port 28015 only needs to listen on localhost, because all the client drivers will be running on localhost. Port 28015 doesn't need to accept inbound traffic from the outside world. diff --git a/docs/server/source/appendices/generate-key-pair-for-ssh.md b/docs/server/source/appendices/generate-key-pair-for-ssh.md new file mode 100644 index 00000000..18b19392 --- /dev/null +++ b/docs/server/source/appendices/generate-key-pair-for-ssh.md @@ -0,0 +1,34 @@ +# Generate a Key Pair for SSH + +This page describes how to use `ssh-keygen` +to generate a public/private RSA key pair +that can be used with SSH. +(Note: `ssh-keygen` is found on most Linux and Unix-like +operating systems; if you're using Windows, +then you'll have to use another tool, +such as PuTTYgen.) + +By convention, SSH key pairs get stored in the `~/.ssh/` directory. +Check what keys you already have there: +```text +ls -1 ~/.ssh/ +``` + +Next, make up a new key pair name (called `` below). +Here are some ideas: + +* `aws-bdb-2` +* `tim-bdb-azure` +* `chris-bcdb-key` + +Next, generate a public/private RSA key pair with that name: +```text +ssh-keygen -t rsa -C "" -f ~/.ssh/ +``` + +It will ask you for a passphrase. +You can use whatever passphrase you like, but don't lose it. +Two keys (files) will be created in `~/.ssh/`: + +1. `~/.ssh/.pub` is the public key +2. `~/.ssh/` is the private key diff --git a/docs/server/source/appendices/index.rst b/docs/server/source/appendices/index.rst index 41b742b9..1c969c05 100755 --- a/docs/server/source/appendices/index.rst +++ b/docs/server/source/appendices/index.rst @@ -10,15 +10,18 @@ Appendices install-os-level-deps install-latest-pip run-with-docker + docker-on-mac json-serialization cryptography the-Bigchain-class - consensus pipelines backend + commands aws-setup + generate-key-pair-for-ssh firewall-notes ntp-notes - example-rethinkdb-storage-setups + rethinkdb-reqs + rethinkdb-backup licenses install-with-lxd diff --git a/docs/server/source/appendices/json-serialization.md b/docs/server/source/appendices/json-serialization.md index c2d03f6e..8322b2de 100644 --- a/docs/server/source/appendices/json-serialization.md +++ b/docs/server/source/appendices/json-serialization.md @@ -24,7 +24,7 @@ deserialize(serialize(data)) == data True ``` -Since BigchainDB performs a lot of serialization we decided to use [python-rapidjson](https://github.com/kenrobbins/python-rapidjson) +Since BigchainDB performs a lot of serialization we decided to use [python-rapidjson](https://github.com/python-rapidjson/python-rapidjson) which is a python wrapper for [rapidjson](https://github.com/miloyip/rapidjson) a fast and fully RFC complient JSON parser. ```python diff --git a/docs/server/source/clusters-feds/backup.md b/docs/server/source/appendices/rethinkdb-backup.md similarity index 94% rename from docs/server/source/clusters-feds/backup.md rename to docs/server/source/appendices/rethinkdb-backup.md index 93fd9aac..732323ed 100644 --- a/docs/server/source/clusters-feds/backup.md +++ b/docs/server/source/appendices/rethinkdb-backup.md @@ -1,6 +1,6 @@ -# Backing Up & Restoring Data +# Backing Up and Restoring Data -There are several ways to backup and restore the data in a BigchainDB cluster. +This page was written when BigchainDB only worked with RethinkDB, so its focus is on RethinkDB-based backup. BigchainDB now supports MongoDB as a backend database and we recommend that you use MongoDB in production. Nevertheless, some of the following backup ideas are still relevant regardless of the backend database being used, so we moved this page to the Appendices. ## RethinkDB's Replication as a form of Backup @@ -64,7 +64,7 @@ In the future, it will be possible for clients to query for the blocks containin **How could we be sure blocks and votes from a client are valid?** -All blocks and votes are signed by federation nodes. Only federation nodes can produce valid signatures because only federation nodes have the necessary private keys. A client can't produce a valid signature for a block or vote. +All blocks and votes are signed by cluster nodes (owned and operated by consortium members). Only cluster nodes can produce valid signatures because only cluster nodes have the necessary private keys. A client can't produce a valid signature for a block or vote. **Could we restore an entire BigchainDB database using client-saved blocks and votes?** @@ -109,7 +109,7 @@ Considerations for BigchainDB: Although it's not advertised as such, RethinkDB's built-in replication feature is similar to continous backup, except the "backup" (i.e. the set of replica shards) is spread across all the nodes. One could take that idea a bit farther by creating a set of backup-only servers with one full backup: * Give all the original BigchainDB nodes (RethinkDB nodes) the server tag `original`. This is the default if you used the RethinkDB config file suggested in the section titled [Configure RethinkDB Server](../dev-and-test/setup-run-node.html#configure-rethinkdb-server). -* Set up a group of servers running RethinkDB only, and give them the server tag `backup`. The `backup` servers could be geographically separated from all the `original` nodes (or not; it's up to the federation). +* Set up a group of servers running RethinkDB only, and give them the server tag `backup`. The `backup` servers could be geographically separated from all the `original` nodes (or not; it's up to the consortium to decide). * Clients shouldn't be able to read from or write to servers in the `backup` set. * Send a RethinkDB reconfigure command to the RethinkDB cluster to make it so that the `original` set has the same number of replicas as before (or maybe one less), and the `backup` set has one replica. Also, make sure the `primary_replica_tag='original'` so that all primary shards live on the `original` nodes. diff --git a/docs/server/source/nodes/node-requirements.md b/docs/server/source/appendices/rethinkdb-reqs.md similarity index 58% rename from docs/server/source/nodes/node-requirements.md rename to docs/server/source/appendices/rethinkdb-reqs.md index 56d52f13..0d3468b7 100644 --- a/docs/server/source/nodes/node-requirements.md +++ b/docs/server/source/appendices/rethinkdb-reqs.md @@ -1,20 +1,8 @@ -# Production Node Requirements +# RethinkDB Requirements -Note: This section will be broken apart into several pages, e.g. NTP requirements, RethinkDB requirements, BigchainDB requirements, etc. and those pages will add more details. +[The RethinkDB documentation](https://rethinkdb.com/docs/) should be your first source of information about its requirements. This page serves mostly to document some of its more obscure requirements. - -## OS Requirements - -* RethinkDB Server [will run on any modern OS](https://www.rethinkdb.com/docs/install/). Note that the Fedora package isn't officially supported. Also, official support for Windows is fairly recent ([April 2016](https://rethinkdb.com/blog/2.3-release/)). -* BigchainDB Server requires Python 3.4+ and Python 3.4+ [will run on any modern OS](https://docs.python.org/3.4/using/index.html). -* BigchaindB Server uses the Python `multiprocessing` package and [some functionality in the `multiprocessing` package doesn't work on OS X](https://docs.python.org/3.4/library/multiprocessing.html#multiprocessing.Queue.qsize). You can still use Mac OS X if you use Docker or a virtual machine. - -The BigchainDB core dev team uses recent LTS versions of Ubuntu and recent versions of Fedora. - -We don't test BigchainDB on Windows or Mac OS X, but you can try. - -* If you run into problems on Windows, then you may want to try using Vagrant. One of our community members ([@Mec-Is](https://github.com/Mec-iS)) wrote [a page about how to install BigchainDB on a VM with Vagrant](https://gist.github.com/Mec-iS/b84758397f1b21f21700). -* If you have Mac OS X and want to experiment with BigchainDB, then you could do that [using Docker](../appendices/run-with-docker.html). +RethinkDB Server [will run on any modern OS](https://www.rethinkdb.com/docs/install/). Note that the Fedora package isn't officially supported. Also, official support for Windows is fairly recent ([April 2016](https://rethinkdb.com/blog/2.3-release/)). ## Storage Requirements @@ -28,6 +16,20 @@ For RethinkDB's failover mechanisms to work, [every RethinkDB table must have at As for the read & write rates, what do you expect those to be for your situation? It's not enough for the storage system alone to handle those rates: the interconnects between the nodes must also be able to handle them. +**Storage Notes Specific to RethinkDB** + +* The RethinkDB storage engine has a number of SSD optimizations, so you _can_ benefit from using SSDs. ([source](https://www.rethinkdb.com/docs/architecture/)) + +* If you have an N-node RethinkDB cluster and 1) you want to use it to store an amount of data D (unique records, before replication), 2) you want the replication factor to be R (all tables), and 3) you want N shards (all tables), then each BigchainDB node must have storage space of at least R×D/N. + +* RethinkDB tables can have [at most 64 shards](https://rethinkdb.com/limitations/). What does that imply? Suppose you only have one table, with 64 shards. How big could that table be? It depends on how much data can be stored in each node. If the maximum amount of data that a node can store is d, then the biggest-possible shard is d, and the biggest-possible table size is 64 times that. (All shard replicas would have to be stored on other nodes beyond the initial 64.) If there are two tables, the second table could also have 64 shards, stored on 64 other maxed-out nodes, so the total amount of unique data in the database would be (64 shards/table)×(2 tables)×d. In general, if you have T tables, the maximum amount of unique data that can be stored in the database (i.e. the amount of data before replication) is 64×T×d. + +* When you set up storage for your RethinkDB data, you may have to select a filesystem. (Sometimes, the filesystem is already decided by the choice of storage.) We recommend using a filesystem that supports direct I/O (Input/Output). Many compressed or encrypted file systems don't support direct I/O. The ext4 filesystem supports direct I/O (but be careful: if you enable the data=journal mode, then direct I/O support will be disabled; the default is data=ordered). If your chosen filesystem supports direct I/O and you're using Linux, then you don't need to do anything to request or enable direct I/O. RethinkDB does that. + +

What is direct I/O? It allows RethinkDB to write directly to the storage device (or use its own in-memory caching mechanisms), rather than relying on the operating system's file read and write caching mechanisms. (If you're using Linux, a write-to-file normally writes to the in-memory Page Cache first; only later does that Page Cache get flushed to disk. The Page Cache is also used when reading files.)

+ +* RethinkDB stores its data in a specific directory. You can tell RethinkDB _which_ directory using the RethinkDB config file, as explained below. In this documentation, we assume the directory is `/data`. If you set up a separate device (partition, RAID array, or logical volume) to store the RethinkDB data, then mount that device on `/data`. + ## Memory (RAM) Requirements diff --git a/docs/server/source/appendices/run-with-docker.md b/docs/server/source/appendices/run-with-docker.md index 6c1d2ce0..fef0e638 100644 --- a/docs/server/source/appendices/run-with-docker.md +++ b/docs/server/source/appendices/run-with-docker.md @@ -25,7 +25,7 @@ docker run \ --interactive \ --rm \ --tty \ - --volume "$HOME/bigchaindb_docker:/data" \ + --volume $HOME/bigchaindb_docker:/data \ bigchaindb/bigchaindb \ -y configure \ [mongodb|rethinkdb] @@ -45,7 +45,7 @@ Let's analyze that command: `$HOME/bigchaindb_docker` to the container directory `/data`; this allows us to have the data persisted on the host machine, you can read more in the [official Docker - documentation](https://docs.docker.com/engine/tutorials/dockervolumes/#/mount-a-host-directory-as-a-data-volume) + documentation](https://docs.docker.com/engine/tutorials/dockervolumes) * `bigchaindb/bigchaindb` the image to use. All the options after the container name are passed on to the entrypoint inside the container. * `-y configure` execute the `configure` sub-command (of the `bigchaindb` command) inside the container, with the `-y` option to automatically use all the default config values @@ -75,21 +75,37 @@ docker run \ --name=rethinkdb \ --publish=172.17.0.1:28015:28015 \ --publish=172.17.0.1:58080:8080 \ + --restart=always \ + --volume $HOME/bigchaindb_docker:/data \ rethinkdb:2.3 ``` + -You can also access the RethinkDB dashboard at -[http://172.17.0.1:58080/](http://172.17.0.1:58080/) +You can also access the RethinkDB dashboard at http://172.17.0.1:58080/ #### For MongoDB +Note: MongoDB runs as user `mongodb` which had the UID `999` and GID `999` +inside the container. For the volume to be mounted properly, as user `mongodb` +in your host, you should have a `mongodb` user with UID and GID `999`. +If you have another user on the host with UID `999`, the mapped files will +be owned by this user in the host. +If there is no owner with UID 999, you can create the corresponding user and +group. + +`useradd -r --uid 999 mongodb` OR `groupadd -r --gid 999 mongodb && useradd -r --uid 999 -g mongodb mongodb` should work. + + ```text docker run \ --detach \ --name=mongodb \ --publish=172.17.0.1:27017:27017 \ + --restart=always \ + --volume=/tmp/mongodb_docker/db:/data/db \ + --volume=/tmp/mongodb_docker/configdb:/data/configdb \ mongo:3.4.1 --replSet=bigchain-rs ``` @@ -100,6 +116,7 @@ docker run \ --detach \ --name=bigchaindb \ --publish=59984:9984 \ + --restart=always \ --volume=$HOME/bigchaindb_docker:/data \ bigchaindb/bigchaindb \ start @@ -123,38 +140,6 @@ machine running the Docker engine. If you are running docker-machine (e.g. on Mac OS X) this will be the IP of the Docker machine (`docker-machine ip machine_name`). -### Load Testing with Docker - -Now that we have BigchainDB running in the Docker container named `bigchaindb`, we can -start another BigchainDB container to generate a load test for it. - -First, make sure the container named `bigchaindb` is still running. You can check that using: -```text -docker ps -``` - -You should see a container named `bigchaindb` in the list. - -You can load test the BigchainDB running in that container by running the `bigchaindb load` command in a second container: - -```text -docker run \ - --env BIGCHAINDB_DATABASE_HOST=bigchaindb \ - --link bigchaindb \ - --rm \ - --volume "$HOME/bigchaindb_docker:/data" \ - bigchaindb/bigchaindb \ - load -``` - -Note the `--link` option to link to the first container (named `bigchaindb`). - -Aside: The `bigchaindb load` command has several options (e.g. `-m`). You can read more about it in [the documentation about the BigchainDB command line interface](../server-reference/bigchaindb-cli.html). - -If you look at the RethinkDB dashboard (in your web browser), you should see the effects of the load test. You can also see some effects in the Docker logs using: -```text -docker logs -f bigchaindb -``` ## Building Your Own Image @@ -171,3 +156,4 @@ docker build --tag local-bigchaindb . ``` Now you can use your own image to run BigchainDB containers. + diff --git a/docs/server/source/cloud-deployment-templates/add-node-on-kubernetes.rst b/docs/server/source/cloud-deployment-templates/add-node-on-kubernetes.rst new file mode 100644 index 00000000..7dcf1104 --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/add-node-on-kubernetes.rst @@ -0,0 +1,178 @@ +Kubernetes Template: Add a BigchainDB Node to an Existing BigchainDB Cluster +============================================================================ + +This page describes how to deploy a BigchainDB node using Kubernetes, +and how to add that node to an existing BigchainDB cluster. +It assumes you already have a running Kubernetes cluster +where you can deploy the new BigchainDB node. + +If you want to deploy the first BigchainDB node in a BigchainDB cluster, +or a stand-alone BigchainDB node, +then see :doc:`the page about that `. + + +Terminology Used +---------------- + +``existing cluster`` will refer to one of the existing Kubernetes clusters +hosting one of the existing BigchainDB nodes. + +``ctx-1`` will refer to the kubectl context of the existing cluster. + +``new cluster`` will refer to the new Kubernetes cluster that will run a new +BigchainDB node (including a BigchainDB instance and a MongoDB instance). + +``ctx-2`` will refer to the kubectl context of the new cluster. + +``new MongoDB instance`` will refer to the MongoDB instance in the new cluster. + +``existing MongoDB instance`` will refer to the MongoDB instance in the +existing cluster. + +``new BigchainDB instance`` will refer to the BigchainDB instance in the new +cluster. + +``existing BigchainDB instance`` will refer to the BigchainDB instance in the +existing cluster. + + +Step 1: Prerequisites +--------------------- + +* A public/private key pair for the new BigchainDB instance. + +* The public key should be shared offline with the other existing BigchainDB + nodes in the existing BigchainDB cluster. + +* You will need the public keys of all the existing BigchainDB nodes. + +* A new Kubernetes cluster setup with kubectl configured to access it. + +* Some familiarity with deploying a BigchainDB node on Kubernetes. + See our :doc:`other docs about that `. + +Note: If you are managing multiple Kubernetes clusters, from your local +system, you can run ``kubectl config view`` to list all the contexts that +are available for the local kubectl. +To target a specific cluster, add a ``--context`` flag to the kubectl CLI. For +example: + +.. code:: bash + + $ kubectl --context ctx-1 apply -f example.yaml + $ kubectl --context ctx-2 apply -f example.yaml + $ kubectl --context ctx-1 proxy --port 8001 + $ kubectl --context ctx-2 proxy --port 8002 + + +Step 2: Prepare the New Kubernetes Cluster +------------------------------------------ + +Follow the steps in the sections to set up Storage Classes and Persistent Volume +Claims, and to run MongoDB in the new cluster: + +1. :ref:`Add Storage Classes ` +2. :ref:`Add Persistent Volume Claims ` +3. :ref:`Create the Config Map ` +4. :ref:`Run MongoDB instance ` + + +Step 3: Add the New MongoDB Instance to the Existing Replica Set +---------------------------------------------------------------- + +Note that by ``replica set``, we are referring to the MongoDB replica set, +not a Kubernetes' ``ReplicaSet``. + +If you are not the administrator of an existing BigchainDB node, you +will have to coordinate offline with an existing administrator so that they can +add the new MongoDB instance to the replica set. + +Add the new instance of MongoDB from an existing instance by accessing the +``mongo`` shell. + +.. code:: bash + + $ kubectl --context ctx-1 exec -it mdb-0 -c mongodb -- /bin/bash + root@mdb-0# mongo --port 27017 + +One can only add members to a replica set from the ``PRIMARY`` instance. +The ``mongo`` shell prompt should state that this is the primary member in the +replica set. +If not, then you can use the ``rs.status()`` command to find out who the +primary is and login to the ``mongo`` shell in the primary. + +Run the ``rs.add()`` command with the FQDN and port number of the other instances: + +.. code:: bash + + PRIMARY> rs.add(":") + + +Step 4: Verify the Replica Set Membership +----------------------------------------- + +You can use the ``rs.conf()`` and the ``rs.status()`` commands available in the +mongo shell to verify the replica set membership. + +The new MongoDB instance should be listed in the membership information +displayed. + + +Step 5: Start the New BigchainDB Instance +----------------------------------------- + +Get the file ``bigchaindb-dep.yaml`` from GitHub using: + +.. code:: bash + + $ wget https://raw.githubusercontent.com/bigchaindb/bigchaindb/master/k8s/bigchaindb/bigchaindb-dep.yaml + +Note that we set the ``BIGCHAINDB_DATABASE_HOST`` to ``mdb`` which is the name +of the MongoDB service defined earlier. + +Edit the ``BIGCHAINDB_KEYPAIR_PUBLIC`` with the public key of this instance, +the ``BIGCHAINDB_KEYPAIR_PRIVATE`` with the private key of this instance and +the ``BIGCHAINDB_KEYRING`` with a ``:`` delimited list of all the public keys +in the BigchainDB cluster. + +Create the required Deployment using: + +.. code:: bash + + $ kubectl --context ctx-2 apply -f bigchaindb-dep.yaml + +You can check its status using the command ``kubectl get deploy -w`` + + +Step 6: Restart the Existing BigchainDB Instance(s) +--------------------------------------------------- + +Add the public key of the new BigchainDB instance to the keyring of all the +existing BigchainDB instances and update the BigchainDB instances using: + +.. code:: bash + + $ kubectl --context ctx-1 replace -f bigchaindb-dep.yaml + +This will create a "rolling deployment" in Kubernetes where a new instance of +BigchainDB will be created, and if the health check on the new instance is +successful, the earlier one will be terminated. This ensures that there is +zero downtime during updates. + +You can SSH to an existing BigchainDB instance and run the ``bigchaindb +show-config`` command to check that the keyring is updated. + + +Step 7: Run NGINX as a Deployment +--------------------------------- + +Please refer :ref:`this ` to +set up NGINX in your new node. + + +Step 8: Test Your New BigchainDB Node +------------------------------------- + +Please refer to the testing steps :ref:`here ` to verify that your new BigchainDB node is working as expected. + diff --git a/docs/server/source/cloud-deployment-templates/first-node.rst b/docs/server/source/cloud-deployment-templates/first-node.rst new file mode 100644 index 00000000..9130696a --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/first-node.rst @@ -0,0 +1,454 @@ +First Node or Bootstrap Node Setup +================================== + +This document is a work in progress and will evolve over time to include +security, websocket and other settings. + +Step 1: Set Up the Cluster +-------------------------- + + .. code:: bash + + az group create --name bdb-test-cluster-0 --location westeurope --debug --output json + + ssh-keygen -t rsa -C "k8s-bdb-test-cluster-0" -f ~/.ssh/k8s-bdb-test-cluster-0 + + az acs create --name k8s-bdb-test-cluster-0 \ + --resource-group bdb-test-cluster-0 \ + --master-count 3 \ + --agent-count 2 \ + --admin-username ubuntu \ + --agent-vm-size Standard_D2_v2 \ + --dns-prefix k8s-bdb-test-cluster-0 \ + --ssh-key-value ~/.ssh/k8s-bdb-test-cluster-0.pub \ + --orchestrator-type kubernetes \ + --debug --output json + + az acs kubernetes get-credentials \ + --resource-group bdb-test-cluster-0 \ + --name k8s-bdb-test-cluster-0 \ + --debug --output json + + echo -e "Host k8s-bdb-test-cluster-0.westeurope.cloudapp.azure.com\n ForwardAgent yes" >> ~/.ssh/config + + +Step 2: Connect to the Cluster UI - (optional) +---------------------------------------------- + + * Get the kubectl context for this cluster using ``kubectl config view``. + + * For the above commands, the context would be ``k8s-bdb-test-cluster-0``. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 proxy -p 8001 + +Step 3. Configure the Cluster +----------------------------- + + * Use the ConfigMap in ``configuration/config-map.yaml`` file for configuring + the cluster. + + * Log in the the MongoDB Cloud Manager and select the group that will monitor + and backup this cluster from the dropdown box. + + * Go to Settings, Group Settings and copy the ``Agent Api Key``. + + * Replace the ```` field with this key. + + * Since this is the first node of the cluster, ensure that the ``data.fqdn`` + field has the value ``mdb-instance-0``. + + * We only support the value ``all`` in the ``data.allowed-hosts`` field for now. + + * Create the ConfigMap + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f configuration/config-map.yaml + +Step 4. Start the NGINX Service +------------------------------- + + * This will will give us a public IP for the cluster. + + * Once you complete this step, you might need to wait up to 10 mins for the + public IP to be assigned. + + * You have the option to use vanilla NGINX or an OpenResty NGINX integrated + with 3scale API Gateway. + + +Step 4.1. Vanilla NGINX +^^^^^^^^^^^^^^^^^^^^^^^ + + * This configuration is located in the file ``nginx/nginx-svc.yaml``. + + * Since this is the first node, rename ``metadata.name`` and ``metadata.labels.name`` + to ``ngx-instance-0``, and ``spec.selector.app`` to ``ngx-instance-0-dep``. + + * Start the Kubernetes Service: + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f nginx/nginx-svc.yaml + + +Step 4.2. OpenResty NGINX + 3scale +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + * You have to enable HTTPS for this one and will need an HTTPS certificate + for your domain + + * Assuming that the public key chain is named ``cert.pem`` and private key is + ``cert.key``, run the following commands to encode the certificates into + single continuous string that can be embedded in yaml. + + .. code:: bash + + cat cert.pem | base64 -w 0 > cert.pem.b64 + + cat cert.key | base64 -w 0 > cert.key.b64 + + + * Copy the contents of ``cert.pem.b64`` in the ``cert.pem`` field, and the + contents of ``cert.key.b64`` in the ``cert.key`` field in the file + ``nginx-3scale/nginx-3scale-secret.yaml`` + + * Create the Kubernetes Secret: + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f nginx-3scale/nginx-3scale-secret.yaml + + * Since this is the first node, rename ``metadata.name`` and ``metadata.labels.name`` + to ``ngx-instance-0``, and ``spec.selector.app`` to ``ngx-instance-0-dep`` in + ``nginx-3scale/nginx-3scale-svc.yaml`` file. + + * Start the Kubernetes Service: + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f nginx-3scale/nginx-3scale-svc.yaml + + +Step 5. Assign DNS Name to the NGINX Public IP +---------------------------------------------- + + * The following command can help you find out if the nginx service strated above + has been assigned a public IP or external IP address: + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 get svc -w + + * Once a public IP is assigned, you can log in to the Azure portal and map it to + a DNS name. + + * We usually start with bdb-test-cluster-0, bdb-test-cluster-1 and so on. + + * Let us assume that we assigned the unique name of ``bdb-test-cluster-0`` here. + + +Step 6. Start the Mongo Kubernetes Service +------------------------------------------ + + * Change ``metadata.name`` and ``metadata.labels.name`` to + ``mdb-instance-0``, and ``spec.selector.app`` to ``mdb-instance-0-ss``. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f mongodb/mongo-svc.yaml + + +Step 7. Start the BigchainDB Kubernetes Service +----------------------------------------------- + + * Change ``metadata.name`` and ``metadata.labels.name`` to + ``bdb-instance-0``, and ``spec.selector.app`` to ``bdb-instance-0-dep``. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f bigchaindb/bigchaindb-svc.yaml + + +Step 8. Start the NGINX Kubernetes Deployment +--------------------------------------------- + + * As in step 4, you have the option to use vanilla NGINX or an OpenResty NGINX + integrated with 3scale API Gateway. + +Step 8.1. Vanilla NGINX +^^^^^^^^^^^^^^^^^^^^^^^ + + * This configuration is located in the file ``nginx/nginx-dep.yaml``. + + * Since this is the first node, change the ``metadata.name`` and + ``spec.template.metadata.labels.app`` to ``ngx-instance-0-dep``. + + * Set ``MONGODB_BACKEND_HOST`` env var to + ``mdb-instance-0.default.svc.cluster.local``. + + * Set ``BIGCHAINDB_BACKEND_HOST`` env var to + ``bdb-instance-0.default.svc.cluster.local``. + + * Set ``MONGODB_FRONTEND_PORT`` to + ``$(NGX_INSTANCE_0_SERVICE_PORT_NGX_PUBLIC_MDB_PORT)``. + + * Set ``BIGCHAINDB_FRONTEND_PORT`` to + ``$(NGX_INSTANCE_0_SERVICE_PORT_NGX_PUBLIC_BDB_PORT)``. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f nginx/nginx-dep.yaml + +Step 8.2. OpenResty NGINX + 3scale +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + * This configuration is located in the file + ``nginx-3scale/nginx-3scale-dep.yaml``. + + * Since this is the first node, change the metadata.name and + spec.template.metadata.labels.app to ``ngx-instance-0-dep``. + + * Set ``MONGODB_BACKEND_HOST`` env var to + ``mdb-instance-0.default.svc.cluster.local``. + + * Set ``BIGCHAINDB_BACKEND_HOST`` env var to + ``bdb-instance-0.default.svc.cluster.local``. + + * Set ``MONGODB_FRONTEND_PORT`` to + ``$(NGX_INSTANCE_0_SERVICE_PORT_NGX_PUBLIC_MDB_PORT)``. + + * Set ``BIGCHAINDB_FRONTEND_PORT`` to + ``$(NGX_INSTANCE_0_SERVICE_PORT_NGX_PUBLIC_BDB_PORT)``. + + * Also, replace the placeholder strings for the env vars with the values + obtained from 3scale. You will need the Secret Token, Service ID, Version Header + and Provider Key from 3scale. + + * The ``THREESCALE_FRONTEND_API_DNS_NAME`` will be DNS name registered for your + HTTPS certificate. + + * You can set the ``THREESCALE_UPSTREAM_API_PORT`` to any port other than 9984, + 9985, 443, 8888 and 27017. We usually use port ``9999``. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f nginx-3scale/nginx-3scale-dep.yaml + + +Step 9. Create a Kubernetes Storage Class for MongoDB +----------------------------------------------------- + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f mongodb/mongo-sc.yaml + + +Step 10. Create a Kubernetes PersistentVolumeClaim +-------------------------------------------------- + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f mongodb/mongo-pvc.yaml + + +Step 11. Start a Kubernetes StatefulSet for MongoDB +--------------------------------------------------- + + * Change ``spec.serviceName`` to ``mdb-instance-0``. + + * Change the ``metadata.name``, ``template.metadata.name`` and + ``template.metadata.labels.app`` to ``mdb-instance-0-ss``. + + * It might take up to 10 minutes for the disks to be created and attached to + the pod. + + * The UI might show that the pod has errored with the + message "timeout expired waiting for volumes to attach/mount". + + * Use the CLI below to check the status of the pod in this case, + instead of the UI. This happens due to a bug in Azure ACS. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f mongodb/mongo-ss.yaml + + * You can check the status of the pod using the command: + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 get po -w + + +Step 12. Start a Kubernetes Deployment for Bigchaindb +----------------------------------------------------- + + * Change both ``metadata.name`` and ``spec.template.metadata.labels.app`` + to ``bdb-instance-0-dep``. + + * Set ``BIGCHAINDB_DATABASE_HOST`` to ``mdb-instance-0``. + + * Set the appropriate ``BIGCHAINDB_KEYPAIR_PUBLIC``, + ``BIGCHAINDB_KEYPAIR_PRIVATE`` values. + + * One way to generate BigchainDB keypair is to run a Python shell with + the command + ``from bigchaindb_driver import crypto; crypto.generate_keypair()``. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f bigchaindb/bigchaindb-dep.yaml + + +Step 13. Start a Kubernetes Deployment for MongoDB Monitoring Agent +------------------------------------------------------------------- + + * Change both metadata.name and spec.template.metadata.labels.app to + ``mdb-mon-instance-0-dep``. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f mongodb-monitoring-agent/mongo-mon-dep.yaml + + * Get the pod name and check its logs: + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 get po + + kubectl --context k8s-bdb-test-cluster-0 logs -f + + +Step 14. Configure MongoDB Cloud Manager for Monitoring +------------------------------------------------------- + + * Open `MongoDB Cloud Manager `_. + + * Click ``Login`` under ``MongoDB Cloud Manager`` and log in to the Cloud Manager. + + * Select the group from the dropdown box on the page. + + * Go to Settings, Group Settings and add a Preferred Hostnames regexp as + ``^mdb-instance-[0-9]{1,2}$``. It may take up to 5 mins till this setting + is in effect. You may refresh the browser window and verify whether the changes + have been saved or not. + + * Next, click the ``Deployment`` tab, and then the ``Manage Existing`` button. + + * On the ``Import your deployment for monitoring`` page, enter the hostname as + ``mdb-instance-0``, port number as ``27017``, with no authentication and no + TLS/SSL settings. + + * Once the deployment is found, click the ``Continue`` button. + This may take about a minute or two. + + * Do not add ``Automation Agent`` when given an option to add it. + + * Verify on the UI that data is being by the monitoring agent. + + +Step 15. Start a Kubernetes Deployment for MongoDB Backup Agent +--------------------------------------------------------------- + + * Change both ``metadata.name`` and ``spec.template.metadata.labels.app`` + to ``mdb-backup-instance-0-dep``. + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 apply -f mongodb-backup-agent/mongo-backup-dep.yaml + + * Get the pod name and check its logs: + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 get po + + kubectl --context k8s-bdb-test-cluster-0 logs -f + + +Step 16. Configure MongoDB Cloud Manager for Backup +--------------------------------------------------- + + * Open `MongoDB Cloud Manager `_. + + * Click ``Login`` under ``MongoDB Cloud Manager`` and log in to the Cloud + Manager. + + * Select the group from the dropdown box on the page. + + * Click ``Backup`` tab. + + * Click on the ``Begin Setup``. + + * Click on ``Next``, select the replica set from the dropdown menu. + + * Verify the details of your MongoDB instance and click on ``Start`` again. + + * It might take up to 5 minutes to start the backup process. + + * Verify that data is being backed up on the UI. + + +Step 17. Verify that the Cluster is Correctly Set Up +---------------------------------------------------- + + * Start the toolbox container in the cluster + + .. code:: bash + + kubectl --context k8s-bdb-test-cluster-0 \ + run -it toolbox \ + --image bigchaindb/toolbox \ + --image-pull-policy=Always \ + --restart=Never --rm + + * Verify MongoDB instance + + .. code:: bash + + nslookup mdb-instance-0 + + dig +noall +answer _mdb-port._tcp.mdb-instance-0.default.svc.cluster.local SRV + + curl -X GET http://mdb-instance-0:27017 + + * Verify BigchainDB instance + + .. code:: bash + + nslookup bdb-instance-0 + + dig +noall +answer _bdb-port._tcp.bdb-instance-0.default.svc.cluster.local SRV + + curl -X GET http://bdb-instance-0:9984 + + * Verify NGINX instance + + .. code:: bash + + nslookup ngx-instance-0 + + dig +noall +answer _ngx-public-mdb-port._tcp.ngx-instance-0.default.svc.cluster.local SRV + + curl -X GET http://ngx-instance-0:27017 # results in curl: (56) Recv failure: Connection reset by peer + + dig +noall +answer _ngx-public-bdb-port._tcp.ngx-instance-0.default.svc.cluster.local SRV + + * If you have run the vanilla NGINX instance, run + + .. code:: bash + + curl -X GET http://ngx-instance-0:80 + + * If you have the OpenResty NGINX + 3scale instance, run + + .. code:: bash + + curl -X GET https://ngx-instance-0 + + * Check the MongoDB monitoring and backup agent on the MOngoDB Coud Manager portal to verify they are working fine. + + * Send some transactions to BigchainDB and verify it's up and running! + diff --git a/docs/server/source/cloud-deployment-templates/index.rst b/docs/server/source/cloud-deployment-templates/index.rst index 67a2ace4..d5b60a0e 100644 --- a/docs/server/source/cloud-deployment-templates/index.rst +++ b/docs/server/source/cloud-deployment-templates/index.rst @@ -5,7 +5,7 @@ We have some "templates" to deploy a basic, working, but bare-bones BigchainDB n You don't have to use the tools we use in the templates. You can use whatever tools you prefer. -If you find the cloud deployment templates for nodes helpful, then you may also be interested in our scripts for :doc:`deploying a testing cluster on AWS <../clusters-feds/aws-testing-cluster>` (documented in the Clusters & Federations section). +If you find the cloud deployment templates for nodes helpful, then you may also be interested in our scripts for :doc:`deploying a testing cluster on AWS <../clusters-feds/aws-testing-cluster>` (documented in the Clusters section). .. toctree:: :maxdepth: 1 @@ -15,4 +15,6 @@ If you find the cloud deployment templates for nodes helpful, then you may also azure-quickstart-template template-kubernetes-azure node-on-kubernetes - \ No newline at end of file + add-node-on-kubernetes + upgrade-on-kubernetes + first-node diff --git a/docs/server/source/cloud-deployment-templates/node-on-kubernetes.rst b/docs/server/source/cloud-deployment-templates/node-on-kubernetes.rst index 03ffb2fe..8c38e384 100644 --- a/docs/server/source/cloud-deployment-templates/node-on-kubernetes.rst +++ b/docs/server/source/cloud-deployment-templates/node-on-kubernetes.rst @@ -1,9 +1,13 @@ -Run a BigchainDB Node in a Kubernetes Cluster -============================================= +Kubernetes Template: Deploy a Single BigchainDB Node +==================================================== -Assuming you already have a `Kubernetes `_ -cluster up and running, this page describes how to run a -BigchainDB node in it. +This page describes how to deploy the first BigchainDB node +in a BigchainDB cluster, or a stand-alone BigchainDB node, +using `Kubernetes `_. +It assumes you already have a running Kubernetes cluster. + +If you want to add a new BigchainDB node to an existing BigchainDB cluster, +refer to :doc:`the page about that `. Step 1: Install kubectl @@ -21,7 +25,7 @@ Step 2: Configure kubectl The default location of the kubectl configuration file is ``~/.kube/config``. If you don't have that file, then you need to get it. -If you deployed your Kubernetes cluster on Azure +**Azure.** If you deployed your Kubernetes cluster on Azure using the Azure CLI 2.0 (as per :doc:`our template `), then you can get the ``~/.kube/config`` file using: @@ -31,16 +35,442 @@ then you can get the ``~/.kube/config`` file using: --resource-group \ --name +If it asks for a password (to unlock the SSH key) +and you enter the correct password, +but you get an error message, +then try adding ``--ssh-key-file ~/.ssh/`` +to the above command (i.e. the path to the private key). -Step 3: Run a MongoDB Container -------------------------------- -To start a MongoDB Docker container in a pod on one of the cluster nodes: +Step 3: Create Storage Classes +------------------------------ + +MongoDB needs somewhere to store its data persistently, +outside the container where MongoDB is running. +Our MongoDB Docker container +(based on the official MongoDB Docker container) +exports two volume mounts with correct +permissions from inside the container: + +* The directory where the mongod instance stores its data: ``/data/db``. + There's more explanation in the MongoDB docs about `storage.dbpath `_. + +* The directory where the mongodb instance stores the metadata for a sharded + cluster: ``/data/configdb/``. + There's more explanation in the MongoDB docs about `sharding.configDB `_. + +Explaining how Kubernetes handles persistent volumes, +and the associated terminology, +is beyond the scope of this documentation; +see `the Kubernetes docs about persistent volumes +`_. + +The first thing to do is create the Kubernetes storage classes. + +**Azure.** First, you need an Azure storage account. +If you deployed your Kubernetes cluster on Azure +using the Azure CLI 2.0 +(as per :doc:`our template `), +then the `az acs create` command already created two +storage accounts in the same location and resource group +as your Kubernetes cluster. +Both should have the same "storage account SKU": ``Standard_LRS``. +Standard storage is lower-cost and lower-performance. +It uses hard disk drives (HDD). +LRS means locally-redundant storage: three replicas +in the same data center. +Premium storage is higher-cost and higher-performance. +It uses solid state drives (SSD). +At the time of writing, +when we created a storage account with SKU ``Premium_LRS`` +and tried to use that, +the PersistentVolumeClaim would get stuck in a "Pending" state. +For future reference, the command to create a storage account is +`az storage account create `_. + + +Get the file ``mongo-sc.yaml`` from GitHub using: .. code:: bash - $ kubectl ????? + $ wget https://raw.githubusercontent.com/bigchaindb/bigchaindb/master/k8s/mongodb/mongo-sc.yaml + +You may have to update the ``parameters.location`` field in both the files to +specify the location you are using in Azure. + +Create the required storage classes using: + +.. code:: bash + + $ kubectl apply -f mongo-sc.yaml -Note: The BigchainDB Dashboard can be deployed -as a Docker container, like everything else. +You can check if it worked using ``kubectl get storageclasses``. + +**Azure.** Note that there is no line of the form +``storageAccount: `` +under ``parameters:``. When we included one +and then created a PersistentVolumeClaim based on it, +the PersistentVolumeClaim would get stuck +in a "Pending" state. +Kubernetes just looks for a storageAccount +with the specified skuName and location. + + +Step 4: Create Persistent Volume Claims +--------------------------------------- + +Next, you will create two PersistentVolumeClaim objects ``mongo-db-claim`` and +``mongo-configdb-claim``. +Get the file ``mongo-pvc.yaml`` from GitHub using: + +.. code:: bash + + $ wget https://raw.githubusercontent.com/bigchaindb/bigchaindb/master/k8s/mongodb/mongo-pvc.yaml + +Note how there's no explicit mention of Azure, AWS or whatever. +``ReadWriteOnce`` (RWO) means the volume can be mounted as +read-write by a single Kubernetes node. +(``ReadWriteOnce`` is the *only* access mode supported +by AzureDisk.) +``storage: 20Gi`` means the volume has a size of 20 +`gibibytes `_. + +You may want to update the ``spec.resources.requests.storage`` field in both +the files to specify a different disk size. + +Create the required Persistent Volume Claims using: + +.. code:: bash + + $ kubectl apply -f mongo-pvc.yaml + + +You can check its status using: ``kubectl get pvc -w`` + +Initially, the status of persistent volume claims might be "Pending" +but it should become "Bound" fairly quickly. + + +Step 5: Create the Config Map - Optional +---------------------------------------- + +This step is required only if you are planning to set up multiple +`BigchainDB nodes +`_. + +MongoDB reads the local ``/etc/hosts`` file while bootstrapping a replica set +to resolve the hostname provided to the ``rs.initiate()`` command. It needs to +ensure that the replica set is being initialized in the same instance where +the MongoDB instance is running. + +To achieve this, you will create a ConfigMap with the FQDN of the MongoDB instance +and populate the ``/etc/hosts`` file with this value so that a replica set can +be created seamlessly. + +Get the file ``mongo-cm.yaml`` from GitHub using: + +.. code:: bash + + $ wget https://raw.githubusercontent.com/bigchaindb/bigchaindb/master/k8s/mongodb/mongo-cm.yaml + +You may want to update the ``data.fqdn`` field in the file before creating the +ConfigMap. ``data.fqdn`` field will be the DNS name of your MongoDB instance. +This will be used by other MongoDB instances when forming a MongoDB +replica set. It should resolve to the MongoDB instance in your cluster when +you are done with the setup. This will help when you are adding more MongoDB +instances to the replica set in the future. + + +**Azure.** +In Kubernetes on ACS, the name you populate in the ``data.fqdn`` field +will be used to configure a DNS name for the public IP assigned to the +Kubernetes Service that is the frontend for the MongoDB instance. +We suggest using a name that will already be available in Azure. +We use ``mdb-instance-0``, ``mdb-instance-1`` and so on in this document, +which gives us ``mdb-instance-0..cloudapp.azure.com``, +``mdb-instance-1..cloudapp.azure.com``, etc. as the FQDNs. +The ```` is the Azure datacenter location you are using, +which can also be obtained using the ``az account list-locations`` command. +You can also try to assign a name to an Public IP in Azure before starting +the process, or use ``nslookup`` with the name you have in mind to check +if it's available for use. + +You should ensure that the the name specified in the ``data.fqdn`` field is +a unique one. + +**Kubernetes on bare-metal or other cloud providers.** +You need to provide the name resolution function +by other means (using DNS providers like GoDaddy, CloudFlare or your own +private DNS server). The DNS set up for other environments is currently +beyond the scope of this document. + + +Create the required ConfigMap using: + +.. code:: bash + + $ kubectl apply -f mongo-cm.yaml + + +You can check its status using: ``kubectl get cm`` + +Now you are ready to run MongoDB and BigchainDB on our Kubernetes cluster. + + +Step 6: Run MongoDB as a StatefulSet +------------------------------------ + +Get the file ``mongo-ss.yaml`` from GitHub using: + +.. code:: bash + + $ wget https://raw.githubusercontent.com/bigchaindb/bigchaindb/master/k8s/mongodb/mongo-ss.yaml + + +Note how the MongoDB container uses the ``mongo-db-claim`` and the +``mongo-configdb-claim`` PersistentVolumeClaims for its ``/data/db`` and +``/data/configdb`` diretories (mount path). Note also that we use the pod's +``securityContext.capabilities.add`` specification to add the ``FOWNER`` +capability to the container. +That is because MongoDB container has the user ``mongodb``, with uid ``999`` +and group ``mongodb``, with gid ``999``. +When this container runs on a host with a mounted disk, the writes fail when +there is no user with uid ``999``. +To avoid this, we use the Docker feature of ``--cap-add=FOWNER``. +This bypasses the uid and gid permission checks during writes and allows data +to be persisted to disk. +Refer to the +`Docker docs `_ +for details. + +As we gain more experience running MongoDB in testing and production, we will +tweak the ``resources.limits.cpu`` and ``resources.limits.memory``. +We will also stop exposing port ``27017`` globally and/or allow only certain +hosts to connect to the MongoDB instance in the future. + +Create the required StatefulSet using: + +.. code:: bash + + $ kubectl apply -f mongo-ss.yaml + +You can check its status using the commands ``kubectl get statefulsets -w`` +and ``kubectl get svc -w`` + +You may have to wait for up to 10 minutes for the disk to be created +and attached on the first run. The pod can fail several times with the message +saying that the timeout for mounting the disk was exceeded. + + +Step 7: Initialize a MongoDB Replica Set - Optional +--------------------------------------------------- + +This step is required only if you are planning to set up multiple +`BigchainDB nodes +`_. + + +Login to the running MongoDB instance and access the mongo shell using: + +.. code:: bash + + $ kubectl exec -it mdb-0 -c mongodb -- /bin/bash + root@mdb-0:/# mongo --port 27017 + +You will initiate the replica set by using the ``rs.initiate()`` command from the +mongo shell. Its syntax is: + +.. code:: bash + + rs.initiate({ + _id : ":" + } ] + }) + +An example command might look like: + +.. code:: bash + + > rs.initiate({ _id : "bigchain-rs", members: [ { _id : 0, host :"mdb-instance-0.westeurope.cloudapp.azure.com:27017" } ] }) + + +where ``mdb-instance-0.westeurope.cloudapp.azure.com`` is the value stored in +the ``data.fqdn`` field in the ConfigMap created using ``mongo-cm.yaml``. + + +You should see changes in the mongo shell prompt from ``>`` +to ``bigchain-rs:OTHER>`` to ``bigchain-rs:SECONDARY>`` and finally +to ``bigchain-rs:PRIMARY>``. + +You can use the ``rs.conf()`` and the ``rs.status()`` commands to check the +detailed replica set configuration now. + + +Step 8: Create a DNS record - Optional +-------------------------------------- + +This step is required only if you are planning to set up multiple +`BigchainDB nodes +`_. + +**Azure.** Select the current Azure resource group and look for the ``Public IP`` +resource. You should see at least 2 entries there - one for the Kubernetes +master and the other for the MongoDB instance. You may have to ``Refresh`` the +Azure web page listing the resources in a resource group for the latest +changes to be reflected. +Select the ``Public IP`` resource that is attached to your service (it should +have the Kubernetes cluster name along with a random string), +select ``Configuration``, add the DNS name that was added in the +ConfigMap earlier, click ``Save``, and wait for the changes to be applied. + +To verify the DNS setting is operational, you can run ``nslookup `` from your local Linux shell. + +This will ensure that when you scale the replica set later, other MongoDB +members in the replica set can reach this instance. + + +Step 9: Run BigchainDB as a Deployment +-------------------------------------- + +Get the file ``bigchaindb-dep.yaml`` from GitHub using: + +.. code:: bash + + $ wget https://raw.githubusercontent.com/bigchaindb/bigchaindb/master/k8s/bigchaindb/bigchaindb-dep.yaml + +Note that we set the ``BIGCHAINDB_DATABASE_HOST`` to ``mdb-svc`` which is the +name of the MongoDB service defined earlier. + +We also hardcode the ``BIGCHAINDB_KEYPAIR_PUBLIC``, +``BIGCHAINDB_KEYPAIR_PRIVATE`` and ``BIGCHAINDB_KEYRING`` for now. + +As we gain more experience running BigchainDB in testing and production, we +will tweak the ``resources.limits`` values for CPU and memory, and as richer +monitoring and probing becomes available in BigchainDB, we will tweak the +``livenessProbe`` and ``readinessProbe`` parameters. + +We also plan to specify scheduling policies for the BigchainDB deployment so +that we ensure that BigchainDB and MongoDB are running in separate nodes, and +build security around the globally exposed port ``9984``. + +Create the required Deployment using: + +.. code:: bash + + $ kubectl apply -f bigchaindb-dep.yaml + +You can check its status using the command ``kubectl get deploy -w`` + + +Step 10: Run NGINX as a Deployment +---------------------------------- + +NGINX is used as a proxy to both the BigchainDB and MongoDB instances in the +node. +It proxies HTTP requests on port 80 to the BigchainDB backend, and TCP +connections on port 27017 to the MongoDB backend. + +You can also configure a whitelist in NGINX to allow only connections from +other instances in the MongoDB replica set to access the backend MongoDB +instance. + +Get the file ``nginx-cm.yaml`` from GitHub using: + +.. code:: bash + + $ wget https://raw.githubusercontent.com/bigchaindb/bigchaindb/master/k8s/nginx/nginx-cm.yaml + +The IP address whitelist can be explicitly configured in ``nginx-cm.yaml`` +file. You will need a list of the IP addresses of all the other MongoDB +instances in the cluster. If the MongoDB intances specify a hostname, then this +needs to be resolved to the corresponding IP addresses. If the IP address of +any MongoDB instance changes, we can start a 'rolling upgrade' of NGINX after +updating the corresponding ConfigMap without affecting availabilty. + + +Create the ConfigMap for the whitelist using: + +.. code:: bash + + $ kubectl apply -f nginx-cm.yaml + +Get the file ``nginx-dep.yaml`` from GitHub using: + +.. code:: bash + + $ wget https://raw.githubusercontent.com/bigchaindb/bigchaindb/master/k8s/nginx/nginx-dep.yaml + +Create the NGINX deployment using: + +.. code:: bash + + $ kubectl apply -f nginx-dep.yaml + + +Step 11: Verify the BigchainDB Node Setup +----------------------------------------- + +Step 11.1: Testing Internally +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Run a container that provides utilities like ``nslookup``, ``curl`` and ``dig`` +on the cluster and query the internal DNS and IP endpoints. + +.. code:: bash + + $ kubectl run -it toolbox -- image --restart=Never --rm + +There is a generic image based on alpine:3.5 with the required utilities +hosted at Docker Hub under `bigchaindb/toolbox `_. +The corresponding Dockerfile is in the bigchaindb/bigchaindb repository on GitHub, at `https://github.com/bigchaindb/bigchaindb/blob/master/k8s/toolbox/Dockerfile `_. + +You can use it as below to get started immediately: + +.. code:: bash + + $ kubectl run -it toolbox --image bigchaindb/toolbox --restart=Never --rm + +It will drop you to the shell prompt. +Now you can query for the ``mdb`` and ``bdb`` service details. + +.. code:: bash + + # nslookup mdb-svc + # nslookup bdb-svc + # nslookup ngx-svc + # dig +noall +answer _mdb-port._tcp.mdb-svc.default.svc.cluster.local SRV + # dig +noall +answer _bdb-port._tcp.bdb-svc.default.svc.cluster.local SRV + # dig +noall +answer _ngx-public-mdb-port._tcp.ngx-svc.default.svc.cluster.local SRV + # dig +noall +answer _ngx-public-bdb-port._tcp.ngx-svc.default.svc.cluster.local SRV + # curl -X GET http://mdb-svc:27017 + # curl -X GET http://bdb-svc:9984 + # curl -X GET http://ngx-svc:80 + # curl -X GET http://ngx-svc:27017 + +The ``nslookup`` commands should output the configured IP addresses of the +services in the cluster + +The ``dig`` commands should return the port numbers configured for the +various services in the cluster. + +Finally, the ``curl`` commands test the availability of the services +themselves. + +Step 11.2: Testing Externally +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Try to access the ``:80`` +on your browser. You must receive a json output that shows the BigchainDB +server version among other things. + +Try to access the ``:27017`` +on your browser. If your IP is in the whitelist, you will receive a message +from the MongoDB instance stating that it doesn't allow HTTP connections to +the port anymore. If your IP is not in the whitelist, your access will be +blocked and you will not see any response from the MongoDB instance. + diff --git a/docs/server/source/cloud-deployment-templates/template-ansible.md b/docs/server/source/cloud-deployment-templates/template-ansible.md index 666ad790..f296a2cf 100644 --- a/docs/server/source/cloud-deployment-templates/template-ansible.md +++ b/docs/server/source/cloud-deployment-templates/template-ansible.md @@ -81,4 +81,4 @@ where, as before, `` must be replaced. ## Next Steps -You could make changes to the Ansible playbook (and the resources it uses) to make the node more production-worthy. See [the section on production node assumptions, components and requirements](../nodes/index.html). +You could make changes to the Ansible playbook (and the resources it uses) to make the node more production-worthy. See [the section on production node assumptions, components and requirements](../production-nodes/index.html). diff --git a/docs/server/source/cloud-deployment-templates/template-kubernetes-azure.rst b/docs/server/source/cloud-deployment-templates/template-kubernetes-azure.rst index ad4a8b04..b967e764 100644 --- a/docs/server/source/cloud-deployment-templates/template-kubernetes-azure.rst +++ b/docs/server/source/cloud-deployment-templates/template-kubernetes-azure.rst @@ -18,7 +18,20 @@ You may find that you have to sign up for a Free Trial subscription first. That's okay: you can have many subscriptions. -Step 2: Deploy an Azure Container Service (ACS) +Step 2: Create an SSH Key Pair +------------------------------ + +You'll want an SSH key pair so you'll be able to SSH +to the virtual machines that you'll deploy in the next step. +(If you already have an SSH key pair, you *could* reuse it, +but it's probably a good idea to make a new SSH key pair +for your Kubernetes VMs and nothing else.) + +See the +:ref:`page about how to generate a key pair for SSH `. + + +Step 3: Deploy an Azure Container Service (ACS) ----------------------------------------------- It's *possible* to deploy an Azure Container Service (ACS) @@ -26,16 +39,18 @@ from the `Azure Portal `_ (i.e. online in your web browser) but it's actually easier to do it using the Azure Command-Line Interface (CLI). -(The Azure Portal will ask you for a public SSH key -and a "service principal," and you'll have to create those -first if they don't exist. The CLI will create them -for you if necessary.) Microsoft has `instructions to install the Azure CLI 2.0 on most common operating systems `_. Do that. +First, update the Azure CLI to the latest version: + +.. code:: bash + + $ az component update + Next, login to your account using: .. code:: bash @@ -79,11 +94,12 @@ Finally, you can deploy an ACS using something like: $ az acs create --name \ --resource-group \ + --master-count 3 \ --agent-count 3 \ + --admin-username ubuntu \ --agent-vm-size Standard_D2_v2 \ --dns-prefix \ - --generate-ssh-keys \ - --location \ + --ssh-key-value ~/.ssh/.pub \ --orchestrator-type kubernetes There are more options. For help understanding all the options, use the built-in help: @@ -99,5 +115,87 @@ go to **Resource groups** (with the blue cube icon) and click on the one you created to see all the resources in it. + +Optional: SSH to Your New Kubernetes Cluster Nodes +-------------------------------------------------- + +You can SSH to one of the just-deployed Kubernetes "master" nodes +(virtual machines) using: + +.. code:: bash + + $ ssh -i ~/.ssh/.pub ubuntu@ + +where you can get the IP address or hostname +of a master node from the Azure Portal. For example: + +.. code:: bash + + $ ssh -i ~/.ssh/mykey123.pub ubuntu@mydnsprefix.westeurope.cloudapp.azure.com + +.. note:: + + All the master nodes should have the *same* IP address and hostname + (also called the Master FQDN). + +The "agent" nodes shouldn't get public IP addresses or hostnames, +so you can't SSH to them *directly*, +but you can first SSH to the master +and then SSH to an agent from there. +To do that, you could +copy your SSH key pair to the master (a bad idea), +or use SSH agent forwarding (better). +To do the latter, do the following on the machine you used +to SSH to the master: + +.. code:: bash + + $ echo -e "Host \n ForwardAgent yes" >> ~/.ssh/config + +To verify that SSH agent forwarding works properly, +SSH to the one of the master nodes and do: + +.. code:: bash + + $ echo "$SSH_AUTH_SOCK" + +If you get an empty response, +then SSH agent forwarding hasn't been set up correctly. +If you get a non-empty response, +then SSH agent forwarding should work fine +and you can SSH to one of the agent nodes (from a master) +using something like: + +.. code:: bash + + $ ssh ubuntu@k8s-agent-4AC80E97-0 + +where ``k8s-agent-4AC80E97-0`` is the name +of a Kubernetes agent node in your Kubernetes cluster. +You will have to replace it by the name +of an agent node in your cluster. + + +Optional: Delete the Kubernetes Cluster +--------------------------------------- + +.. code:: bash + + $ az acs delete \ + --name \ + --resource-group + + +Optional: Delete the Resource Group +----------------------------------- + +CAUTION: You might end up deleting resources other than the ACS cluster. + +.. code:: bash + + $ az group delete \ + --name + + Next, you can :doc:`run a BigchainDB node on your new -Kubernetes cluster `. \ No newline at end of file +Kubernetes cluster `. diff --git a/docs/server/source/cloud-deployment-templates/upgrade-on-kubernetes.rst b/docs/server/source/cloud-deployment-templates/upgrade-on-kubernetes.rst new file mode 100644 index 00000000..ba109fbe --- /dev/null +++ b/docs/server/source/cloud-deployment-templates/upgrade-on-kubernetes.rst @@ -0,0 +1,105 @@ +Kubernetes Template: Upgrade all Software in a BigchainDB Node +============================================================== + +This page outlines how to upgrade all the software associated +with a BigchainDB node running on Kubernetes, +including host operating systems, Docker, Kubernetes, +and BigchainDB-related software. + + +Upgrade Host OS, Docker and Kubernetes +-------------------------------------- + +Some Kubernetes installation & management systems +can do full or partial upgrades of host OSes, Docker, +or Kubernetes, e.g. +`Tectonic `_, +`Rancher `_, +and +`Kubo `_. +Consult the documentation for your system. + +**Azure Container Service (ACS).** +On Dec. 15, 2016, a Microsoft employee +`wrote `_: +"In the coming months we [the Azure Kubernetes team] will be building managed updates in the ACS service." +At the time of writing, managed updates were not yet available, +but you should check the latest +`ACS documentation `_ +to see what's available now. +Also at the time of writing, ACS only supported Ubuntu +as the host (master and agent) operating system. +You can upgrade Ubuntu and Docker on Azure +by SSHing into each of the hosts, +as documented on +:ref:`another page `. + +In general, you can SSH to each host in your Kubernetes Cluster +to update the OS and Docker. + +.. note:: + + Once you are in an SSH session with a host, + the ``docker info`` command is a handy way to detemine the + host OS (including version) and the Docker version. + +When you want to upgrade the software on a Kubernetes node, +you should "drain" the node first, +i.e. tell Kubernetes to gracefully terminate all pods +on the node and mark it as unscheduleable +(so no new pods get put on the node during its downtime). + +.. code:: + + kubectl drain $NODENAME + +There are `more details in the Kubernetes docs `_, +including instructions to make the node scheduleable again. + +To manually upgrade the host OS, +see the docs for that OS. + +To manually upgrade Docker, see +`the Docker docs `_. + +To manually upgrade all Kubernetes software in your Kubernetes cluster, see +`the Kubernetes docs `_. + + +Upgrade BigchainDB-Related Software +----------------------------------- + +We use Kubernetes "Deployments" for NGINX, BigchainDB, +and most other BigchainDB-related software. +The only exception is MongoDB; we use a Kubernetes +StatefulSet for that. + +The nice thing about Kubernetes Deployments +is that Kubernetes can manage most of the upgrade process. +A typical upgrade workflow for a single Deployment would be: + +.. code:: + + $ KUBE_EDITOR=nano kubectl edit deployment/ + +The ``kubectl edit`` command +opens the specified editor (nano in the above example), +allowing you to edit the specified Deployment *in the Kubernetes cluster*. +You can change the version tag on the Docker image, for example. +Don't forget to save your edits before exiting the editor. +The Kubernetes docs have more information about +`Deployments `_ (including updating them). + + +The upgrade story for the MongoDB StatefulSet is *different*. +(This is because MongoDB has persistent state, +which is stored in some storage associated with a PersistentVolumeClaim.) +At the time of writing, StatefulSets were still in beta, +and they did not support automated image upgrade (Docker image tag upgrade). +We expect that to change. +Rather than trying to keep these docs up-to-date, +we advise you to check out the current +`Kubernetes docs about updating containers in StatefulSets +`_. + + diff --git a/docs/server/source/clusters-feds/aws-testing-cluster.md b/docs/server/source/clusters-feds/aws-testing-cluster.md index ac1deff1..497d20a2 100644 --- a/docs/server/source/clusters-feds/aws-testing-cluster.md +++ b/docs/server/source/clusters-feds/aws-testing-cluster.md @@ -1,6 +1,6 @@ -# Deploy a Testing Cluster on AWS +# Deploy a RethinkDB-Based Testing Cluster on AWS -This section explains a way to deploy a cluster of BigchainDB nodes on Amazon Web Services (AWS) for testing purposes. +This section explains a way to deploy a _RethinkDB-based_ cluster of BigchainDB nodes on Amazon Web Services (AWS) for testing purposes. ## Why? @@ -86,7 +86,6 @@ Step 2 is to make an AWS deployment configuration file, if necessary. There's an ```text NUM_NODES=3 BRANCH="master" -WHAT_TO_DEPLOY="servers" SSH_KEY_NAME="not-set-yet" USE_KEYPAIRS_FILE=False IMAGE_ID="ami-8504fdea" diff --git a/docs/server/source/clusters-feds/index.rst b/docs/server/source/clusters-feds/index.rst index d13221ce..40e3b873 100644 --- a/docs/server/source/clusters-feds/index.rst +++ b/docs/server/source/clusters-feds/index.rst @@ -1,10 +1,9 @@ -Clusters & Federations -====================== +Clusters +======== .. toctree:: :maxdepth: 1 - set-up-a-federation - backup + set-up-a-cluster aws-testing-cluster diff --git a/docs/server/source/clusters-feds/set-up-a-cluster.md b/docs/server/source/clusters-feds/set-up-a-cluster.md new file mode 100644 index 00000000..4b02bd9f --- /dev/null +++ b/docs/server/source/clusters-feds/set-up-a-cluster.md @@ -0,0 +1,28 @@ +# Set Up a Cluster + +This section is about how to set up a BigchainDB cluster where each node is operated by a different operator. If you want to set up and run a testing cluster on AWS (where all nodes are operated by you), then see [the section about that](aws-testing-cluster.html). + + +## Initial Questions + +There are many questions that must be answered before setting up a BigchainDB cluster. For example: + +* Do you have a governance process for making consortium-level decisions, such as how to admit new members? +* What will you store in creation transactions (data payload)? Is there a data schema? +* Will you use transfer transactions? Will they include a non-empty data payload? +* Who will be allowed to submit transactions? Who will be allowed to read or query transactions? How will you enforce the access rules? + + +## Set Up the Initial Cluster + +The consortium must decide some things before setting up the initial cluster (initial set of BigchainDB nodes): + +1. Who will operate each node in the initial cluster? +2. What will the replication factor be? (It should be 3 or more.) +3. Who will deploy the first node, second node, etc.? + +Once those things have been decided, the cluster deployment process can begin. The process for deploying a production node is outlined in [the section on production nodes](../production-nodes/index.html). + +Every time a new BigchainDB node is added, every other node must update their [BigchainDB keyring](../server-reference/configuration.html#keyring) (one of the BigchainDB configuration settings): they must add the public key of the new node. + +To secure communications between BigchainDB nodes, each BigchainDB node can use a firewall or similar, and doing that will require additional coordination. diff --git a/docs/server/source/clusters-feds/set-up-a-federation.md b/docs/server/source/clusters-feds/set-up-a-federation.md deleted file mode 100644 index ed1ddd1a..00000000 --- a/docs/server/source/clusters-feds/set-up-a-federation.md +++ /dev/null @@ -1,28 +0,0 @@ -# Set Up a Federation - -This section is about how to set up a BigchainDB _federation_, where each node is operated by a different operator. If you want to set up and run a testing cluster on AWS (where all nodes are operated by you), then see [the section about that](aws-testing-cluster.html). - - -## Initial Checklist - -* Do you have a governance process for making federation-level decisions, such as how to admit new members? -* What will you store in creation transactions (data payload)? Is there a data schema? -* Will you use transfer transactions? Will they include a non-empty data payload? -* Who will be allowed to submit transactions? Who will be allowed to read or query transactions? How will you enforce the access rules? - - -## Set Up the Initial Cluster - -The federation must decide some things before setting up the initial cluster (initial set of BigchainDB nodes): - -1. Who will operate a node in the initial cluster? -2. What will the replication factor be? (It must be 3 or more for [RethinkDB failover](https://rethinkdb.com/docs/failover/) to work.) -3. Which node will be responsible for sending the commands to configure the RethinkDB database? - -Once those things have been decided, each node operator can begin setting up their BigchainDB (production) node. - -Each node operator will eventually need two pieces of information from all other nodes in the federation: - -1. Their RethinkDB hostname, e.g. `rdb.farm2.organization.org` -2. Their BigchainDB public key, e.g. `Eky3nkbxDTMgkmiJC8i5hKyVFiAQNmPP4a2G4JdDxJCK` - diff --git a/docs/server/source/conf.py b/docs/server/source/conf.py index 5550e994..756a8d13 100644 --- a/docs/server/source/conf.py +++ b/docs/server/source/conf.py @@ -82,7 +82,7 @@ master_doc = 'index' # General information about the project. project = 'BigchainDB Server' -copyright = '2016' +copyright = '2017, BigchainDB Contributors' author = 'BigchainDB Contributors' # The version info for the project you're documenting, acts as replacement for diff --git a/docs/server/source/data-models/block-model.rst b/docs/server/source/data-models/block-model.rst index 3c94fca1..8b184261 100644 --- a/docs/server/source/data-models/block-model.rst +++ b/docs/server/source/data-models/block-model.rst @@ -11,7 +11,7 @@ A block has the following structure: "timestamp": "", "transactions": [""], "node_pubkey": "", - "voters": [""] + "voters": [""] }, "signature": "" } @@ -23,9 +23,9 @@ A block has the following structure: - ``timestamp``: The Unix time when the block was created. It's provided by the node that created the block. - ``transactions``: A list of the transactions included in the block. - ``node_pubkey``: The public key of the node that created the block. - - ``voters``: A list of the public keys of federation nodes at the time the block was created. - It's the list of federation nodes which can cast a vote on this block. - This list can change from block to block, as nodes join and leave the federation. + - ``voters``: A list of the public keys of all cluster nodes at the time the block was created. + It's the list of nodes which can cast a vote on this block. + This list can change from block to block, as nodes join and leave the cluster. - ``signature``: :ref:`Cryptographic signature ` of the block by the node that created the block (i.e. the node with public key ``node_pubkey``). To generate the signature, the node signs the serialized inner ``block`` (the same thing that was hashed to determine the ``id``) using the private key corresponding to ``node_pubkey``. diff --git a/docs/server/source/data-models/inputs-outputs.rst b/docs/server/source/data-models/inputs-outputs.rst index 9f1b5d56..4309a4c8 100644 --- a/docs/server/source/data-models/inputs-outputs.rst +++ b/docs/server/source/data-models/inputs-outputs.rst @@ -22,7 +22,19 @@ One can also put different weights on the inputs to a threshold condition, along The (single) output of a threshold condition can be used as one of the inputs of other threshold conditions. This means that one can combine threshold conditions to build complex logical expressions, e.g. (x OR y) AND (u OR v). -When one creates a condition, one can calculate its fulfillment length (e.g. 96). The more complex the condition, the larger its fulfillment length will be. A BigchainDB federation can put an upper limit on the allowed fulfillment length, as a way of capping the complexity of conditions (and the computing time required to validate them). +When one creates a condition, one can calculate its fulfillment length (e.g. +96). The more complex the condition, the larger its fulfillment length will be. +A BigchainDB federation can put an upper limit on the complexity of the +conditions, either directly by setting a maximum allowed fulfillment length, +or +`indirectly `_ +by :ref:`setting a maximum allowed transaction size ` +which would limit +the overall complexity accross all inputs and outputs of a transaction. +Note: At the time of writing, there was no configuration setting +to set a maximum allowed fulfillment length, +so the only real option was to +:ref:`set a maximum allowed transaction size `. If someone tries to make a condition where the output of a threshold condition feeds into the input of another “earlier” threshold condition (i.e. in a closed logical circuit), then their computer will take forever to calculate the (infinite) “condition URI”, at least in theory. In practice, their computer will run out of memory or their client software will timeout after a while. diff --git a/docs/server/source/dev-and-test/setup-run-node.md b/docs/server/source/dev-and-test/setup-run-node.md index bb7285b4..1b60e3c3 100644 --- a/docs/server/source/dev-and-test/setup-run-node.md +++ b/docs/server/source/dev-and-test/setup-run-node.md @@ -23,7 +23,9 @@ Start RethinkDB using: $ rethinkdb ``` -You can verify that RethinkDB is running by opening the RethinkDB web interface in your web browser. It should be at [http://localhost:8080/](http://localhost:8080/). +You can verify that RethinkDB is running by opening the RethinkDB web interface in your web browser. It should be at http://localhost:8080/ + + To run BigchainDB Server, do: ```text @@ -87,28 +89,28 @@ Start RethinkDB: docker-compose up -d rdb ``` -The RethinkDB web interface should be accessible at . +The RethinkDB web interface should be accessible at http://localhost:58080/. Depending on which platform, and/or how you are running docker, you may need to change `localhost` for the `ip` of the machine that is running docker. As a dummy example, if the `ip` of that machine was `0.0.0.0`, you would access the -web interface at: . +web interface at: http://0.0.0.0:58080/. Start a BigchainDB node: ```bash -docker-compose up -d bdb +docker-compose up -d bdb-rdb ``` You can monitor the logs: ```bash -docker-compose logs -f bdb +docker-compose logs -f bdb-rdb ``` If you wish to run the tests: ```bash -docker-compose run --rm bdb py.test -v -n auto +docker-compose run --rm bdb-rdb py.test -v -n auto ``` ### Docker with MongoDB @@ -128,19 +130,19 @@ $ docker-compose port mdb 27017 Start a BigchainDB node: ```bash -docker-compose up -d bdb-mdb +docker-compose up -d bdb ``` You can monitor the logs: ```bash -docker-compose logs -f bdb-mdb +docker-compose logs -f bdb ``` If you wish to run the tests: ```bash -docker-compose run --rm bdb-mdb py.test -v --database-backend=mongodb +docker-compose run --rm bdb py.test -v --database-backend=mongodb ``` ### Accessing the HTTP API diff --git a/docs/server/source/drivers-clients/index.rst b/docs/server/source/drivers-clients/index.rst index 704832c0..127d6309 100644 --- a/docs/server/source/drivers-clients/index.rst +++ b/docs/server/source/drivers-clients/index.rst @@ -14,18 +14,16 @@ community projects listed below. .. toctree:: :maxdepth: 1 - http-client-server-api - websocket-event-stream-api The Python Driver Transaction CLI -Community Driven Libraries and Tools +Community-Driven Libraries and Tools ------------------------------------ Please note that some of these projects may be work in progress, but may nevertheless be very useful. * `Javascript transaction builder `_ -* `Haskell transaction builder `_ +* `Haskell transaction builder `_ * `Go driver `_ * `Java driver `_ diff --git a/docs/server/source/drivers-clients/http-client-server-api.rst b/docs/server/source/http-client-server-api.rst similarity index 89% rename from docs/server/source/drivers-clients/http-client-server-api.rst rename to docs/server/source/http-client-server-api.rst index 26ccd2f5..eb462d25 100644 --- a/docs/server/source/drivers-clients/http-client-server-api.rst +++ b/docs/server/source/http-client-server-api.rst @@ -22,7 +22,7 @@ or ``https://example.com:9984`` then you should get an HTTP response with something like the following in the body: -.. literalinclude:: samples/index-response.http +.. literalinclude:: http-samples/index-response.http :language: http @@ -35,7 +35,7 @@ or ``https://example.com:9984/api/v1/``, then you should get an HTTP response that allows you to discover the BigchainDB API endpoints: -.. literalinclude:: samples/api-index-response.http +.. literalinclude:: http-samples/api-index-response.http :language: http @@ -46,20 +46,24 @@ Transactions Get the transaction with the ID ``tx_id``. - This endpoint returns a transaction only if a ``VALID`` block on - ``bigchain`` exists. + This endpoint returns a transaction if it was included in a ``VALID`` block, + if it is still waiting to be processed (``BACKLOG``) or is still in an + undecided block (``UNDECIDED``). All instances of a transaction in invalid + blocks are ignored and treated as if they don't exist. If a request is made + for a transaction and instances of that transaction are found only in + invalid blocks, then the response will be ``404 Not Found``. :param tx_id: transaction ID :type tx_id: hex string **Example request**: - .. literalinclude:: samples/get-tx-id-request.http + .. literalinclude:: http-samples/get-tx-id-request.http :language: http **Example response**: - .. literalinclude:: samples/get-tx-id-response.http + .. literalinclude:: http-samples/get-tx-id-response.http :language: http :resheader Content-Type: ``application/json`` @@ -106,12 +110,12 @@ Transactions **Example request**: - .. literalinclude:: samples/get-tx-by-asset-request.http + .. literalinclude:: http-samples/get-tx-by-asset-request.http :language: http **Example response**: - .. literalinclude:: samples/get-tx-by-asset-response.http + .. literalinclude:: http-samples/get-tx-by-asset-response.http :language: http :resheader Content-Type: ``application/json`` @@ -135,12 +139,12 @@ Transactions **Example request**: - .. literalinclude:: samples/post-tx-request.http + .. literalinclude:: http-samples/post-tx-request.http :language: http **Example response**: - .. literalinclude:: samples/post-tx-response.http + .. literalinclude:: http-samples/post-tx-response.http :language: http :resheader Content-Type: ``application/json`` @@ -223,12 +227,12 @@ Statuses **Example request**: - .. literalinclude:: samples/get-statuses-tx-request.http + .. literalinclude:: http-samples/get-statuses-tx-request.http :language: http **Example response**: - .. literalinclude:: samples/get-statuses-tx-valid-response.http + .. literalinclude:: http-samples/get-statuses-tx-valid-response.http :language: http :resheader Content-Type: ``application/json`` @@ -246,17 +250,17 @@ Statuses **Example request**: - .. literalinclude:: samples/get-statuses-block-request.http + .. literalinclude:: http-samples/get-statuses-block-request.http :language: http **Example response**: - .. literalinclude:: samples/get-statuses-block-invalid-response.http + .. literalinclude:: http-samples/get-statuses-block-invalid-response.http :language: http **Example response**: - .. literalinclude:: samples/get-statuses-block-valid-response.http + .. literalinclude:: http-samples/get-statuses-block-valid-response.http :language: http :resheader Content-Type: ``application/json`` @@ -294,12 +298,12 @@ Blocks **Example request**: - .. literalinclude:: samples/get-block-request.http + .. literalinclude:: http-samples/get-block-request.http :language: http **Example response**: - .. literalinclude:: samples/get-block-response.http + .. literalinclude:: http-samples/get-block-response.http :language: http @@ -349,12 +353,12 @@ Blocks **Example request**: - .. literalinclude:: samples/get-block-txid-request.http + .. literalinclude:: http-samples/get-block-txid-request.http :language: http **Example response**: - .. literalinclude:: samples/get-block-txid-response.http + .. literalinclude:: http-samples/get-block-txid-response.http :language: http :resheader Content-Type: ``application/json`` @@ -380,12 +384,12 @@ Votes **Example request**: - .. literalinclude:: samples/get-vote-request.http + .. literalinclude:: http-samples/get-vote-request.http :language: http **Example response**: - .. literalinclude:: samples/get-vote-response.http + .. literalinclude:: http-samples/get-vote-response.http :language: http :resheader Content-Type: ``application/json`` @@ -402,7 +406,7 @@ Determining the API Root URL When you start BigchainDB Server using ``bigchaindb start``, an HTTP API is exposed at some address. The default is: -`http://localhost:9984/api/v1/ `_ +``http://localhost:9984/api/v1/`` It's bound to ``localhost``, so you can access it from the same machine, diff --git a/docs/server/source/index.rst b/docs/server/source/index.rst index 7f85a228..bf45aca0 100644 --- a/docs/server/source/index.rst +++ b/docs/server/source/index.rst @@ -8,12 +8,13 @@ BigchainDB Server Documentation introduction quickstart cloud-deployment-templates/index - nodes/index + production-nodes/index dev-and-test/index server-reference/index + http-client-server-api + websocket-event-stream-api drivers-clients/index clusters-feds/index - topic-guides/index data-models/index schema/transaction schema/vote diff --git a/docs/server/source/introduction.md b/docs/server/source/introduction.md index b9e6bf0a..02cf5ecf 100644 --- a/docs/server/source/introduction.md +++ b/docs/server/source/introduction.md @@ -10,7 +10,7 @@ Note that there are a few kinds of nodes: - A **bare-bones node** is a node deployed in the cloud, either as part of a testing cluster or as a starting point before upgrading the node to be production-ready. Our cloud deployment templates deploy a bare-bones node, as do our scripts for deploying a testing cluster on AWS. -- A **production node** is a node that is part of a federation's BigchainDB cluster. A production node has the most components and requirements. +- A **production node** is a node that is part of a consortium's BigchainDB cluster. A production node has the most components and requirements. ## Setup Instructions for Various Cases @@ -19,7 +19,7 @@ Note that there are a few kinds of nodes: * [Set up and run a bare-bones node in the cloud](cloud-deployment-templates/index.html) * [Set up and run a local dev/test node for developing and testing BigchainDB Server](dev-and-test/setup-run-node.html) * [Deploy a testing cluster on AWS](clusters-feds/aws-testing-cluster.html) -* [Set up and run a federation (including production nodes)](clusters-feds/set-up-a-federation.html) +* [Set up and run a cluster (including production nodes)](clusters-feds/set-up-a-cluster.html) Instructions for setting up a client will be provided once there's a public test net. diff --git a/docs/server/source/nodes/index.rst b/docs/server/source/nodes/index.rst deleted file mode 100644 index 1c3671f0..00000000 --- a/docs/server/source/nodes/index.rst +++ /dev/null @@ -1,10 +0,0 @@ -Production Node Assumptions, Components & Requirements -====================================================== - -.. toctree:: - :maxdepth: 1 - - node-assumptions - node-components - node-requirements - setup-run-node diff --git a/docs/server/source/nodes/node-assumptions.md b/docs/server/source/nodes/node-assumptions.md deleted file mode 100644 index f7e8379f..00000000 --- a/docs/server/source/nodes/node-assumptions.md +++ /dev/null @@ -1,13 +0,0 @@ -# Production Node Assumptions - -If you're not sure what we mean by a BigchainDB *node*, *cluster*, *federation*, or *production node*, then see [the section in the Introduction where we defined those terms](../introduction.html#some-basic-vocabulary). - -We make some assumptions about production nodes: - -1. **Each production node is set up and managed by an experienced professional system administrator (or a team of them).** - -2. Each production node in a federation's cluster is managed by a different person or team. - -Because of the first assumption, we don't provide a detailed cookbook explaining how to secure a server, or other things that a sysadmin should know. (We do provide some [templates](../cloud-deployment-templates/index.html), but those are just a starting point.) - - diff --git a/docs/server/source/nodes/node-components.md b/docs/server/source/nodes/node-components.md deleted file mode 100644 index 9d6b41ec..00000000 --- a/docs/server/source/nodes/node-components.md +++ /dev/null @@ -1,23 +0,0 @@ -# Production Node Components - -A BigchainDB node must include, at least: - -* BigchainDB Server and -* RethinkDB Server. - -When doing development and testing, it's common to install both on the same machine, but in a production environment, it may make more sense to install them on separate machines. - -In a production environment, a BigchainDB node should have several other components, including: - -* nginx or similar, as a reverse proxy and/or load balancer for the Gunicorn server(s) inside the node -* An NTP daemon running on all machines running BigchainDB code, and possibly other machines -* A RethinkDB proxy server -* A RethinkDB "wire protocol firewall" (in the future: this component doesn't exist yet) -* Scalable storage for RethinkDB (e.g. using RAID) -* Monitoring software, to monitor all the machines in the node -* Configuration management agents (if you're using a configuration managment system that uses agents) -* Maybe more - -The relationship between these components is illustrated below. - -![Components of a node](../_static/Node-components.png) diff --git a/docs/server/source/nodes/setup-run-node.md b/docs/server/source/nodes/setup-run-node.md deleted file mode 100644 index 41a9cdd1..00000000 --- a/docs/server/source/nodes/setup-run-node.md +++ /dev/null @@ -1,193 +0,0 @@ -# Set Up and Run a Cluster Node - -This is a page of general guidelines for setting up a production node. It says nothing about how to upgrade software, storage, processing, etc. or other details of node management. It will be expanded more in the future. - - -## Get a Server - -The first step is to get a server (or equivalent) which meets [the requirements for a BigchainDB node](node-requirements.html). - - -## Secure Your Server - -The steps that you must take to secure your server depend on your server OS and where your server is physically located. There are many articles and books about how to secure a server. Here we just cover special considerations when securing a BigchainDB node. - -There are some [notes on BigchainDB-specific firewall setup](../appendices/firewall-notes.html) in the Appendices. - - -## Sync Your System Clock - -A BigchainDB node uses its system clock to generate timestamps for blocks and votes, so that clock should be kept in sync with some standard clock(s). The standard way to do that is to run an NTP daemon (Network Time Protocol daemon) on the node. (You could also use tlsdate, which uses TLS timestamps rather than NTP, but don't: it's not very accurate and it will break with TLS 1.3, which removes the timestamp.) - -NTP is a standard protocol. There are many NTP daemons implementing it. We don't recommend a particular one. On the contrary, we recommend that different nodes in a federation run different NTP daemons, so that a problem with one daemon won't affect all nodes. - -Please see the [notes on NTP daemon setup](../appendices/ntp-notes.html) in the Appendices. - - -## Set Up Storage for RethinkDB Data - -Below are some things to consider when setting up storage for the RethinkDB data. The Appendices have a [section with concrete examples](../appendices/example-rethinkdb-storage-setups.html). - -We suggest you set up a separate storage "device" (partition, RAID array, or logical volume) to store the RethinkDB data. Here are some questions to ask: - -* How easy will it be to add storage in the future? Will I have to shut down my server? -* How big can the storage get? (Remember that [RAID](https://en.wikipedia.org/wiki/RAID) can be used to make several physical drives look like one.) -* How fast can it read & write data? How many input/output operations per second (IOPS)? -* How does IOPS scale as more physical hard drives are added? -* What's the latency? -* What's the reliability? Is there replication? -* What's in the Service Level Agreement (SLA), if applicable? -* What's the cost? - -There are many options and tradeoffs. Don't forget to look into Amazon Elastic Block Store (EBS) and Amazon Elastic File System (EFS), or their equivalents from other providers. - -**Storage Notes Specific to RethinkDB** - -* The RethinkDB storage engine has a number of SSD optimizations, so you _can_ benefit from using SSDs. ([source](https://www.rethinkdb.com/docs/architecture/)) - -* If you want a RethinkDB cluster to store an amount of data D, with a replication factor of R (on every table), and the cluster has N nodes, then each node will need to be able to store R×D/N data. - -* RethinkDB tables can have [at most 64 shards](https://rethinkdb.com/limitations/). For example, if you have only one table and more than 64 nodes, some nodes won't have the primary of any shard, i.e. they will have replicas only. In other words, once you pass 64 nodes, adding more nodes won't provide more storage space for new data. If the biggest single-node storage available is d, then the most you can store in a RethinkDB cluster is < 64×d: accomplished by putting one primary shard in each of 64 nodes, with all replica shards on other nodes. (This is assuming one table. If there are T tables, then the most you can store is < 64×d×T.) - -* When you set up storage for your RethinkDB data, you may have to select a filesystem. (Sometimes, the filesystem is already decided by the choice of storage.) We recommend using a filesystem that supports direct I/O (Input/Output). Many compressed or encrypted file systems don't support direct I/O. The ext4 filesystem supports direct I/O (but be careful: if you enable the data=journal mode, then direct I/O support will be disabled; the default is data=ordered). If your chosen filesystem supports direct I/O and you're using Linux, then you don't need to do anything to request or enable direct I/O. RethinkDB does that. - -

What is direct I/O? It allows RethinkDB to write directly to the storage device (or use its own in-memory caching mechanisms), rather than relying on the operating system's file read and write caching mechanisms. (If you're using Linux, a write-to-file normally writes to the in-memory Page Cache first; only later does that Page Cache get flushed to disk. The Page Cache is also used when reading files.)

- -* RethinkDB stores its data in a specific directory. You can tell RethinkDB _which_ directory using the RethinkDB config file, as explained below. In this documentation, we assume the directory is `/data`. If you set up a separate device (partition, RAID array, or logical volume) to store the RethinkDB data, then mount that device on `/data`. - - -## Install RethinkDB Server - -If you don't already have RethinkDB Server installed, you must install it. The RethinkDB documentation has instructions for [how to install RethinkDB Server on a variety of operating systems](https://rethinkdb.com/docs/install/). - - -## Configure RethinkDB Server - -Create a RethinkDB configuration file (text file) named `instance1.conf` with the following contents (explained below): -```text -directory=/data -bind=all -direct-io -# Replace node?_hostname with actual node hostnames below, e.g. rdb.examples.com -join=node0_hostname:29015 -join=node1_hostname:29015 -join=node2_hostname:29015 -# continue until there's a join= line for each node in the federation -``` - -* `directory=/data` tells the RethinkDB node to store its share of the database data in `/data`. -* `bind=all` binds RethinkDB to all local network interfaces (e.g. loopback, Ethernet, wireless, whatever is available), so it can communicate with the outside world. (The default is to bind only to local interfaces.) -* `direct-io` tells RethinkDB to use direct I/O (explained earlier). Only include this line if your file system supports direct I/O. -* `join=hostname:29015` lines: A cluster node needs to find out the hostnames of all the other nodes somehow. You _could_ designate one node to be the one that every other node asks, and put that node's hostname in the config file, but that wouldn't be very decentralized. Instead, we include _every_ node in the list of nodes-to-ask. - -If you're curious about the RethinkDB config file, there's [a RethinkDB documentation page about it](https://www.rethinkdb.com/docs/config-file/). The [explanations of the RethinkDB command-line options](https://rethinkdb.com/docs/cli-options/) are another useful reference. - -See the [RethinkDB documentation on securing your cluster](https://rethinkdb.com/docs/security/). - - -## Install Python 3.4+ - -If you don't already have it, then you should [install Python 3.4+](https://www.python.org/downloads/). - -If you're testing or developing BigchainDB on a stand-alone node, then you should probably create a Python 3.4+ virtual environment and activate it (e.g. using virtualenv or conda). Later we will install several Python packages and you probably only want those installed in the virtual environment. - - -## Install BigchainDB Server - -First, [install the OS-level dependencies of BigchainDB Server (link)](../appendices/install-os-level-deps.html). - -With OS-level dependencies installed, you can install BigchainDB Server with `pip` or from source. - - -### How to Install BigchainDB with pip - -BigchainDB (i.e. both the Server and the officially-supported drivers) is distributed as a Python package on PyPI so you can install it using `pip`. First, make sure you have an up-to-date Python 3.4+ version of `pip` installed: -```text -pip -V -``` - -If it says that `pip` isn't installed, or it says `pip` is associated with a Python version less than 3.4, then you must install a `pip` version associated with Python 3.4+. In the following instructions, we call it `pip3` but you may be able to use `pip` if that refers to the same thing. See [the `pip` installation instructions](https://pip.pypa.io/en/stable/installing/). - -On Ubuntu 16.04, we found that this works: -```text -sudo apt-get install python3-pip -``` - -That should install a Python 3 version of `pip` named `pip3`. If that didn't work, then another way to get `pip3` is to do `sudo apt-get install python3-setuptools` followed by `sudo easy_install3 pip`. - -You can upgrade `pip` (`pip3`) and `setuptools` to the latest versions using: -```text -pip3 install --upgrade pip setuptools -pip3 -V -``` - -Now you can install BigchainDB Server (and officially-supported BigchainDB drivers) using: -```text -pip3 install bigchaindb -``` - -(If you're not in a virtualenv and you want to install bigchaindb system-wide, then put `sudo` in front.) - -Note: You can use `pip3` to upgrade the `bigchaindb` package to the latest version using `pip3 install --upgrade bigchaindb`. - - -### How to Install BigchainDB from Source - -If you want to install BitchainDB from source because you want to use the very latest bleeding-edge code, clone the public repository: -```text -git clone git@github.com:bigchaindb/bigchaindb.git -python setup.py install -``` - - -## Configure BigchainDB Server - -Start by creating a default BigchainDB config file: -```text -bigchaindb -y configure rethinkdb -``` - -(There's documentation for the `bigchaindb` command is in the section on [the BigchainDB Command Line Interface (CLI)](bigchaindb-cli.html).) - -Edit the created config file: - -* Open `$HOME/.bigchaindb` (the created config file) in your text editor. -* Change `"server": {"bind": "localhost:9984", ... }` to `"server": {"bind": "0.0.0.0:9984", ... }`. This makes it so traffic can come from any IP address to port 9984 (the HTTP Client-Server API port). -* Change `"keyring": []` to `"keyring": ["public_key_of_other_node_A", "public_key_of_other_node_B", "..."]` i.e. a list of the public keys of all the other nodes in the federation. The keyring should _not_ include your node's public key. - -For more information about the BigchainDB config file, see [Configuring a BigchainDB Node](configuration.html). - - -## Run RethinkDB Server - -Start RethinkDB using: -```text -rethinkdb --config-file path/to/instance1.conf -``` - -except replace the path with the actual path to `instance1.conf`. - -Note: It's possible to [make RethinkDB start at system startup](https://www.rethinkdb.com/docs/start-on-startup/). - -You can verify that RethinkDB is running by opening the RethinkDB web interface in your web browser. It should be at `http://rethinkdb-hostname:8080/`. If you're running RethinkDB on localhost, that would be [http://localhost:8080/](http://localhost:8080/). - - -## Run BigchainDB Server - -After all node operators have started RethinkDB, but before they start BigchainDB, one designated node operator must configure the RethinkDB database by running the following commands: -```text -bigchaindb init -bigchaindb set-shards numshards -bigchaindb set-replicas numreplicas -``` - -where: - -* `bigchaindb init` creates the database within RethinkDB, the tables, the indexes, and the genesis block. -* `numshards` should be set to the number of nodes in the initial cluster. -* `numreplicas` should be set to the database replication factor decided by the federation. It must be 3 or more for [RethinkDB failover](https://rethinkdb.com/docs/failover/) to work. - -Once the RethinkDB database is configured, every node operator can start BigchainDB using: -```text -bigchaindb start -``` diff --git a/docs/server/source/production-nodes/index.rst b/docs/server/source/production-nodes/index.rst new file mode 100644 index 00000000..4a9cb15b --- /dev/null +++ b/docs/server/source/production-nodes/index.rst @@ -0,0 +1,12 @@ +Production Nodes +================ + +.. toctree:: + :maxdepth: 1 + + node-assumptions + node-components + node-requirements + setup-run-node + reverse-proxy-notes + \ No newline at end of file diff --git a/docs/server/source/production-nodes/node-assumptions.md b/docs/server/source/production-nodes/node-assumptions.md new file mode 100644 index 00000000..9d52aa5a --- /dev/null +++ b/docs/server/source/production-nodes/node-assumptions.md @@ -0,0 +1,16 @@ +# Production Node Assumptions + +Be sure you know the key BigchainDB terminology: + +* [BigchainDB node, BigchainDB cluster and BigchainDB consortum](https://docs.bigchaindb.com/en/latest/terminology.html) +* [dev/test node, bare-bones node and production node](../introduction.html) + +We make some assumptions about production nodes: + +1. Production nodes use MongoDB, not RethinkDB. +1. Each production node is set up and managed by an experienced professional system administrator or a team of them. +1. Each production node in a cluster is managed by a different person or team. + +You can use RethinkDB when building prototypes, but we don't advise or support using it in production. + +We don't provide a detailed cookbook explaining how to secure a server, or other things that a sysadmin should know. (We do provide some [templates](../cloud-deployment-templates/index.html), but those are just a starting point.) diff --git a/docs/server/source/production-nodes/node-components.md b/docs/server/source/production-nodes/node-components.md new file mode 100644 index 00000000..d7d4e85b --- /dev/null +++ b/docs/server/source/production-nodes/node-components.md @@ -0,0 +1,22 @@ +# Production Node Components + +A production BigchainDB node must include: + +* BigchainDB Server +* MongoDB Server 3.4+ (mongod) +* Scalable storage for MongoDB + +It could also include several other components, including: + +* NGINX or similar, to provide authentication, rate limiting, etc. +* An NTP daemon running on all machines running BigchainDB Server or mongod, and possibly other machines +* **Not** MongoDB Automation Agent. It's for automating the deployment of an entire MongoDB cluster, not just one MongoDB node within a cluster. +* MongoDB Monitoring Agent +* MongoDB Backup Agent +* Log aggregation software +* Monitoring software +* Maybe more + +The relationship between the main components is illustrated below. Note that BigchainDB Server must be able to communicate with the _primary_ MongoDB instance, and any of the MongoDB instances might be the primary, so BigchainDB Server must be able to communicate with all the MongoDB instances. Also, all MongoDB instances must be able to communicate with each other. + +![Components of a production node](../_static/Node-components.png) diff --git a/docs/server/source/production-nodes/node-requirements.md b/docs/server/source/production-nodes/node-requirements.md new file mode 100644 index 00000000..9588747b --- /dev/null +++ b/docs/server/source/production-nodes/node-requirements.md @@ -0,0 +1,17 @@ +# Production Node Requirements + +**This page is about the requirements of BigchainDB Server.** You can find the requirements of MongoDB, NGINX, your NTP daemon, your monitoring software, and other [production node components](node-components.html) in the documentation for that software. + + +## OS Requirements + +BigchainDB Server requires Python 3.4+ and Python 3.4+ [will run on any modern OS](https://docs.python.org/3.4/using/index.html), but we recommend using an LTS version of [Ubuntu Server](https://www.ubuntu.com/server) or a similarly server-grade Linux distribution. + +_Don't use macOS_ (formerly OS X, formerly Mac OS X), because it's not a server-grade operating system. Also, BigchaindB Server uses the Python multiprocessing package and [some functionality in the multiprocessing package doesn't work on Mac OS X](https://docs.python.org/3.4/library/multiprocessing.html#multiprocessing.Queue.qsize). + + +## General Considerations + +BigchainDB Server runs many concurrent processes, so more RAM and more CPU cores is better. + +As mentioned on the page about [production node components](node-components.html), every machine running BigchainDB Server should be running an NTP daemon. diff --git a/docs/server/source/production-nodes/reverse-proxy-notes.md b/docs/server/source/production-nodes/reverse-proxy-notes.md new file mode 100644 index 00000000..18930942 --- /dev/null +++ b/docs/server/source/production-nodes/reverse-proxy-notes.md @@ -0,0 +1,72 @@ +# Using a Reverse Proxy + +You may want to: + +* rate limit inbound HTTP requests, +* authenticate/authorize inbound HTTP requests, +* block requests with an HTTP request body that's too large, or +* enable HTTPS (TLS) between your users and your node. + +While we could have built all that into BigchainDB Server, +we didn't, because you can do all that (and more) +using a reverse proxy such as NGINX or HAProxy. +(You would put it in front of your BigchainDB Server, +so that all inbound HTTP requests would arrive +at the reverse proxy before *maybe* being proxied +onwards to your BigchainDB Server.) +For detailed instructions, see the documentation +for your reverse proxy. + +Below, we note how a reverse proxy can be used +to do some BigchainDB-specific things. + +You may also be interested in +[our NGINX configuration file template](https://github.com/bigchaindb/nginx_3scale/blob/master/nginx.conf.template) +(open source, on GitHub). + + +## Enforcing a Max Transaction Size + +The BigchainDB HTTP API has several endpoints, +but only one of them, the `POST /transactions` endpoint, +expects a non-empty HTTP request body: +the transaction (JSON) being submitted by the user. + +If you want to enforce a maximum-allowed transaction size +(discarding any that are larger), +then you can do so by configuring a maximum request body size +in your reverse proxy. +For example, NGINX has the `client_max_body_size` +configuration setting. You could set it to 15 kB +with the following line in your NGINX config file: + +```text +client_max_body_size 15k; +``` + +For more information, see +[the NGINX docs about client_max_body_size](https://nginx.org/en/docs/http/ngx_http_core_module.html#client_max_body_size). + +Note: By enforcing a maximum transaction size, you +[indirectly enforce a maximum crypto-conditions complexity](https://github.com/bigchaindb/bigchaindb/issues/356#issuecomment-288085251). + + +**Aside: Why 15 kB?** + +Both [RethinkDB](https://rethinkdb.com/limitations/) and +[MongoDB have a maximum document size of 16 MB](https://docs.mongodb.com/manual/reference/limits/#limit-bson-document-size). +In BigchainDB, the biggest documents are the blocks. +A BigchainDB block can contain up to 1000 transactions, +plus some other data (e.g. the timestamp). +If we ignore the other data as negligible relative to all the transactions, +then a block of size 16 MB +will have an average transaction size of (16 MB)/1000 = 16 kB. +Therefore by limiting the max transaction size to 15 kB, +you can be fairly sure that no blocks will ever be +bigger than 16 MB. + +Note: Technically, the documents that MongoDB stores aren't the JSON +that BigchainDB users think of; they're JSON converted to BSON. +Moreover, [one can use GridFS with MongoDB to store larger documents](https://docs.mongodb.com/manual/core/gridfs/). +Therefore the above calculation shoud be seen as a rough guide, +not the last word. diff --git a/docs/server/source/production-nodes/setup-run-node.md b/docs/server/source/production-nodes/setup-run-node.md new file mode 100644 index 00000000..6e7ddbea --- /dev/null +++ b/docs/server/source/production-nodes/setup-run-node.md @@ -0,0 +1,137 @@ +# Set Up and Run a Cluster Node + +This is a page of general guidelines for setting up a production BigchainDB node. Before continuing, make sure you've read the pages about production node [assumptions](node-assumptions.html), [components](node-components.html) and [requirements](node-requirements.html). + +Note: These are just guidelines. You can modify them to suit your needs. For example, if you want to initialize the MongoDB replica set before installing BigchainDB, you _can_ do that. If you'd prefer to use Docker and Kubernetes, you can (and [we have a template](../cloud-deployment-templates/node-on-kubernetes.html)). We don't cover all possible setup procedures here. + + +## Security Guidelines + +There are many articles, websites and books about securing servers, virtual machines, networks, etc. Consult those. +There are some [notes on BigchainDB-specific firewall setup](../appendices/firewall-notes.html) in the Appendices. + + +## Sync Your System Clock + +A BigchainDB node uses its system clock to generate timestamps for blocks and votes, so that clock should be kept in sync with some standard clock(s). The standard way to do that is to run an NTP daemon (Network Time Protocol daemon) on the node. + +MongoDB also recommends having an NTP daemon running on all MongoDB nodes. + +NTP is a standard protocol. There are many NTP daemons implementing it. We don't recommend a particular one. On the contrary, we recommend that different nodes in a cluster run different NTP daemons, so that a problem with one daemon won't affect all nodes. + +Please see the [notes on NTP daemon setup](../appendices/ntp-notes.html) in the Appendices. + + +## Set Up Storage for MongoDB + +We suggest you set up a separate storage device (partition, RAID array, or logical volume) to store the data in the MongoDB database. Here are some questions to ask: + +* How easy will it be to add storage in the future? Will I have to shut down my server? +* How big can the storage get? (Remember that [RAID](https://en.wikipedia.org/wiki/RAID) can be used to make several physical drives look like one.) +* How fast can it read & write data? How many input/output operations per second (IOPS)? +* How does IOPS scale as more physical hard drives are added? +* What's the latency? +* What's the reliability? Is there replication? +* What's in the Service Level Agreement (SLA), if applicable? +* What's the cost? + +There are many options and tradeoffs. + +Consult the MongoDB documentation for its recommendations regarding storage hardware, software and settings, e.g. in the [MongoDB Production Notes](https://docs.mongodb.com/manual/administration/production-notes/). + + +## Install and Run MongoDB + +* [Install MongoDB 3.4+](https://docs.mongodb.com/manual/installation/). (BigchainDB only works with MongoDB 3.4+.) +* [Run MongoDB (mongod)](https://docs.mongodb.com/manual/reference/program/mongod/) + + +## Install BigchainDB Server + +### Install BigchainDB Server Dependencies + +Before you can install BigchainDB Server, you must [install its OS-level dependencies](../appendices/install-os-level-deps.html) and you may have to [install Python 3.4+](https://www.python.org/downloads/). + +### How to Install BigchainDB Server with pip + +BigchainDB is distributed as a Python package on PyPI so you can install it using `pip`. First, make sure you have an up-to-date Python 3.4+ version of `pip` installed: +```text +pip -V +``` + +If it says that `pip` isn't installed, or it says `pip` is associated with a Python version less than 3.4, then you must install a `pip` version associated with Python 3.4+. In the following instructions, we call it `pip3` but you may be able to use `pip` if that refers to the same thing. See [the `pip` installation instructions](https://pip.pypa.io/en/stable/installing/). + +On Ubuntu 16.04, we found that this works: +```text +sudo apt-get install python3-pip +``` + +That should install a Python 3 version of `pip` named `pip3`. If that didn't work, then another way to get `pip3` is to do `sudo apt-get install python3-setuptools` followed by `sudo easy_install3 pip`. + +You can upgrade `pip` (`pip3`) and `setuptools` to the latest versions using: +```text +pip3 install --upgrade pip setuptools +pip3 -V +``` + +Now you can install BigchainDB Server using: +```text +pip3 install bigchaindb +``` + +(If you're not in a virtualenv and you want to install bigchaindb system-wide, then put `sudo` in front.) + +Note: You can use `pip3` to upgrade the `bigchaindb` package to the latest version using `pip3 install --upgrade bigchaindb`. + + +### How to Install BigchainDB Server from Source + +If you want to install BitchainDB from source because you want to use the very latest bleeding-edge code, clone the public repository: +```text +git clone git@github.com:bigchaindb/bigchaindb.git +cd bigchaindb +python setup.py install +``` + + +## Configure BigchainDB Server + +Start by creating a default BigchainDB config file for a MongoDB backend: +```text +bigchaindb -y configure mongodb +``` + +(There's documentation for the `bigchaindb` command is in the section on [the BigchainDB Command Line Interface (CLI)](../server-reference/bigchaindb-cli.html).) + +Edit the created config file by opening `$HOME/.bigchaindb` (the created config file) in your text editor: + +* Change `"server": {"bind": "localhost:9984", ... }` to `"server": {"bind": "0.0.0.0:9984", ... }`. This makes it so traffic can come from any IP address to port 9984 (the HTTP Client-Server API port). +* Change `"keyring": []` to `"keyring": ["public_key_of_other_node_A", "public_key_of_other_node_B", "..."]` i.e. a list of the public keys of all the other nodes in the cluster. The keyring should _not_ include your node's public key. +* Ensure that `database.host` and `database.port` are set to the hostname and port of your MongoDB instance. (The port is usually 27017, unless you changed it.) + +For more information about the BigchainDB config file, see the page about the [BigchainDB configuration settings](../server-reference/configuration.html). + + +## Get All Other Nodes to Update Their Keyring + +All other BigchainDB nodes in the cluster must add your new node's public key to their BigchainDB keyring. Currently, the only way to get BigchainDB Server to "notice" a changed keyring is to shut it down and start it back up again (with the new keyring). + + +## Maybe Update the MongoDB Replica Set + +**If this isn't the first node in the BigchainDB cluster**, then someone with an existing BigchainDB node (not you) must add your MongoDB instance to the MongoDB replica set. They can do so (on their node) using: +```text +bigchaindb add-replicas your-mongod-hostname:27017 +``` + +where they must replace `your-mongod-hostname` with the actual hostname of your MongoDB instance, and they may have to replace `27017` with the actual port. + + +## Start BigchainDB + +**Warning: If you're not deploying the first node in the BigchainDB cluster, then don't start BigchainDB before your MongoDB instance has been added to the MongoDB replica set (as outlined above).** + +```text +# See warning above +bigchaindb start +``` diff --git a/docs/server/source/server-reference/bigchaindb-cli.md b/docs/server/source/server-reference/bigchaindb-cli.md index 5fdf8fdf..05f321f9 100644 --- a/docs/server/source/server-reference/bigchaindb-cli.md +++ b/docs/server/source/server-reference/bigchaindb-cli.md @@ -68,16 +68,22 @@ You can also use the `--dev-start-rethinkdb` command line option to automaticall e.g. `bigchaindb --dev-start-rethinkdb start`. Note that this will also shutdown rethinkdb when the bigchaindb process stops. The option `--dev-allow-temp-keypair` will generate a keypair on the fly if no keypair is found, this is useful when you want to run a temporary instance of BigchainDB in a Docker container, for example. +### Options +The log level for the console can be set via the option `--log-level` or its +abbreviation `-l`. Example: -## bigchaindb load - -Write transactions to the backlog (for benchmarking tests). You can learn more about it using: -```text -$ bigchaindb load -h +```bash +$ bigchaindb --log-level INFO start ``` -Note: This command uses the Python Server API to write transactions to the database. It _doesn't_ use the HTTP API or a driver that wraps the HTTP API. +The allowed levels are `DEBUG`, `INFO` , `WARNING`, `ERROR`, and `CRITICAL`. +For an explanation regarding these levels please consult the +[Logging Levels](https://docs.python.org/3.6/library/logging.html#levels) +section of Python's documentation. +For a more fine-grained control over the logging configuration you can use the +configuration file as documented under +[Configuration Settings](configuration.html). ## bigchaindb set-shards diff --git a/docs/server/source/server-reference/configuration.md b/docs/server/source/server-reference/configuration.md index f12b8247..053ed68b 100644 --- a/docs/server/source/server-reference/configuration.md +++ b/docs/server/source/server-reference/configuration.md @@ -16,12 +16,26 @@ For convenience, here's a list of all the relevant environment variables (docume `BIGCHAINDB_DATABASE_PORT`
`BIGCHAINDB_DATABASE_NAME`
`BIGCHAINDB_DATABASE_REPLICASET`
+`BIGCHAINDB_DATABASE_CONNECTION_TIMEOUT`
+`BIGCHAINDB_DATABASE_MAX_TRIES`
`BIGCHAINDB_SERVER_BIND`
+`BIGCHAINDB_SERVER_LOGLEVEL`
`BIGCHAINDB_SERVER_WORKERS`
`BIGCHAINDB_SERVER_THREADS`
+`BIGCHAINDB_WSSERVER_HOST`
+`BIGCHAINDB_WSSERVER_PORT`
`BIGCHAINDB_CONFIG_PATH`
`BIGCHAINDB_BACKLOG_REASSIGN_DELAY`
-`BIGCHAINDB_CONSENSUS_PLUGIN`
+`BIGCHAINDB_LOG`
+`BIGCHAINDB_LOG_FILE`
+`BIGCHAINDB_LOG_ERROR_FILE`
+`BIGCHAINDB_LOG_LEVEL_CONSOLE`
+`BIGCHAINDB_LOG_LEVEL_LOGFILE`
+`BIGCHAINDB_LOG_DATEFMT_CONSOLE`
+`BIGCHAINDB_LOG_DATEFMT_LOGFILE`
+`BIGCHAINDB_LOG_FMT_CONSOLE`
+`BIGCHAINDB_LOG_FMT_LOGFILE`
+`BIGCHAINDB_LOG_GRANULAR_LEVELS`
The local config file is `$HOME/.bigchaindb` by default (a file which might not even exist), but you can tell BigchainDB to use a different file by using the `-c` command-line option, e.g. `bigchaindb -c path/to/config_file.json start` or using the `BIGCHAINDB_CONFIG_PATH` environment variable, e.g. `BIGHAINDB_CONFIG_PATH=.my_bigchaindb_config bigchaindb start`. @@ -76,9 +90,18 @@ Note how the keys in the list are separated by colons. ``` -## database.backend, database.host, database.port, database.name & database.replicaset +## database.* -The database backend to use (`rethinkdb` or `mongodb`) and its hostname, port and name. If the database backend is `mongodb`, then there's a fifth setting: the name of the replica set. If the database backend is `rethinkdb`, you *can* set the name of the replica set, but it won't be used for anything. +The settings with names of the form `database.*` are for the database backend +(currently either RethinkDB or MongoDB). They are: + +* `database.backend` is either `rethinkdb` or `mongodb`. +* `database.host` is the hostname (FQDN) of the backend database. +* `database.port` is self-explanatory. +* `database.name` is a user-chosen name for the database inside RethinkDB or MongoDB, e.g. `bigchain`. +* `database.replicaset` is only relevant if using MongoDB; it's the name of the MongoDB replica set, e.g. `bigchain-rs`. +* `database.connection_timeout` is the maximum number of milliseconds that BigchainDB will wait before giving up on one attempt to connect to the database backend. Note: At the time of writing, this setting was only used by MongoDB; there was an open [issue to make RethinkDB use it as well](https://github.com/bigchaindb/bigchaindb/issues/1337). +* `database.max_tries` is the maximum number of times that BigchainDB will try to establish a connection with the database backend. If 0, then it will try forever. **Example using environment variables** ```text @@ -87,6 +110,8 @@ export BIGCHAINDB_DATABASE_HOST=localhost export BIGCHAINDB_DATABASE_PORT=27017 export BIGCHAINDB_DATABASE_NAME=bigchain export BIGCHAINDB_DATABASE_REPLICASET=bigchain-rs +export BIGCHAINDB_DATABASE_CONNECTION_TIMEOUT=5000 +export BIGCHAINDB_DATABASE_MAX_TRIES=3 ``` **Default values** @@ -96,8 +121,10 @@ If (no environment variables were set and there's no local config file), or you "database": { "backend": "rethinkdb", "host": "localhost", + "port": 28015, "name": "bigchain", - "port": 28015 + "connection_timeout": 5000, + "max_tries": 3 } ``` @@ -106,24 +133,31 @@ If you used `bigchaindb -y configure mongodb` to create a default local config f "database": { "backend": "mongodb", "host": "localhost", - "name": "bigchain", "port": 27017, - "replicaset": "bigchain-rs" + "name": "bigchain", + "replicaset": "bigchain-rs", + "connection_timeout": 5000, + "max_tries": 3 } ``` -## server.bind, server.workers & server.threads +## server.bind, server.loglevel, server.workers & server.threads -These settings are for the [Gunicorn HTTP server](http://gunicorn.org/), which is used to serve the [HTTP client-server API](../drivers-clients/http-client-server-api.html). +These settings are for the [Gunicorn HTTP server](http://gunicorn.org/), which is used to serve the [HTTP client-server API](../http-client-server-api.html). `server.bind` is where to bind the Gunicorn HTTP server socket. It's a string. It can be any valid value for [Gunicorn's bind setting](http://docs.gunicorn.org/en/stable/settings.html#bind). If you want to allow IPv4 connections from anyone, on port 9984, use '0.0.0.0:9984'. In a production setting, we recommend you use Gunicorn behind a reverse proxy server. If Gunicorn and the reverse proxy are running on the same machine, then use 'localhost:PORT' where PORT is _not_ 9984 (because the reverse proxy needs to listen on port 9984). Maybe use PORT=9983 in that case because we know 9983 isn't used. If Gunicorn and the reverse proxy are running on different machines, then use 'A.B.C.D:9984' where A.B.C.D is the IP address of the reverse proxy. There's [more information about deploying behind a reverse proxy in the Gunicorn documentation](http://docs.gunicorn.org/en/stable/deploy.html). (They call it a proxy.) +`server.loglevel` sets the log level of Gunicorn's Error log outputs. See +[Gunicorn's documentation](http://docs.gunicorn.org/en/latest/settings.html#loglevel) +for more information. + `server.workers` is [the number of worker processes](http://docs.gunicorn.org/en/stable/settings.html#workers) for handling requests. If `None` (the default), the value will be (cpu_count * 2 + 1). `server.threads` is [the number of threads-per-worker](http://docs.gunicorn.org/en/stable/settings.html#threads) for handling requests. If `None` (the default), the value will be (cpu_count * 2 + 1). The HTTP server will be able to handle `server.workers` * `server.threads` requests simultaneously. **Example using environment variables** ```text export BIGCHAINDB_SERVER_BIND=0.0.0.0:9984 +export BIGCHAINDB_SERVER_LOGLEVEL=debug export BIGCHAINDB_SERVER_WORKERS=5 export BIGCHAINDB_SERVER_THREADS=5 ``` @@ -132,6 +166,7 @@ export BIGCHAINDB_SERVER_THREADS=5 ```js "server": { "bind": "0.0.0.0:9984", + "loglevel": "debug", "workers": 5, "threads": 5 } @@ -141,11 +176,46 @@ export BIGCHAINDB_SERVER_THREADS=5 ```js "server": { "bind": "localhost:9984", + "loglevel": "info", "workers": null, "threads": null } ``` + +## wsserver.host and wsserver.port + +These settings are for the +[aiohttp server](https://aiohttp.readthedocs.io/en/stable/index.html), +which is used to serve the +[WebSocket Event Stream API](../websocket-event-stream-api.html). +`wsserver.host` is where to bind the aiohttp server socket and +`wsserver.port` is the corresponding port. +If you want to allow connections from anyone, on port 9985, +set `wsserver.host` to 0.0.0.0 and `wsserver.port` to 9985. + +**Example using environment variables** +```text +export BIGCHAINDB_WSSERVER_HOST=0.0.0.0 +export BIGCHAINDB_WSSERVER_PORT=9985 +``` + +**Example config file snippet** +```js +"wsserver": { + "host": "0.0.0.0", + "port": 65000 +} +``` + +**Default values (from a config file)** +```js +"wsserver": { + "host": "localhost", + "port": 9985 +} +``` + ## backlog_reassign_delay Specifies how long, in seconds, transactions can remain in the backlog before being reassigned. Long-waiting transactions must be reassigned because the assigned node may no longer be responsive. The default duration is 120 seconds. @@ -160,16 +230,237 @@ export BIGCHAINDB_BACKLOG_REASSIGN_DELAY=30 "backlog_reassign_delay": 120 ``` -## consensus_plugin -The [consensus plugin](../appendices/consensus.html) to use. +## log -**Example using an environment variable** -```text -export BIGCHAINDB_CONSENSUS_PLUGIN=default +The `log` key is expected to point to a mapping (set of key/value pairs) +holding the logging configuration. + +**Example**: + +``` +{ + "log": { + "file": "/var/log/bigchaindb.log", + "error_file": "/var/log/bigchaindb-errors.log", + "level_console": "info", + "level_logfile": "info", + "datefmt_console": "%Y-%m-%d %H:%M:%S", + "datefmt_logfile": "%Y-%m-%d %H:%M:%S", + "fmt_console": "%(asctime)s [%(levelname)s] (%(name)s) %(message)s", + "fmt_logfile": "%(asctime)s [%(levelname)s] (%(name)s) %(message)s", + "granular_levels": { + "bichaindb.backend": "info", + "bichaindb.core": "info" + } +} ``` -**Example config file snippet: the default** -```js -"consensus_plugin": "default" +**Defaults to**: + ``` +{ + "log": { + "file": "~/bigchaindb.log", + "error_file": "~/bigchaindb-errors.log", + "level_console": "info", + "level_logfile": "info", + "datefmt_console": "%Y-%m-%d %H:%M:%S", + "datefmt_logfile": "%Y-%m-%d %H:%M:%S", + "fmt_logfile": "[%(asctime)s] [%(levelname)s] (%(name)s) %(message)s (%(processName)-10s - pid: %(process)d)", + "fmt_console": "[%(asctime)s] [%(levelname)s] (%(name)s) %(message)s (%(processName)-10s - pid: %(process)d)", + "granular_levels": {} +} +``` + +The next subsections explain each field of the `log` configuration. + + +### log.file & log.error_file +The full paths to the files where logs and error logs should be written to. + +**Example**: + +``` +{ + "log": { + "file": "/var/log/bigchaindb/bigchaindb.log" + "error_file": "/var/log/bigchaindb/bigchaindb-errors.log" + } +} +``` + +**Defaults to**: + + * `"~/bigchaindb.log"` + * `"~/bigchaindb-errors.log"` + +Please note that the user running `bigchaindb` must have write access to the +locations. + +#### Log rotation + +Log files have a size limit of 200 MB and will be rotated up to five times. + +For example if we consider the log file setting: + +``` +{ + "log": { + "file": "~/bigchain.log" + } +} +``` + +logs would always be written to `bigchain.log`. Each time the file +`bigchain.log` reaches 200 MB it would be closed and renamed +`bigchain.log.1`. If `bigchain.log.1` and `bigchain.log.2` already exist they +would be renamed `bigchain.log.2` and `bigchain.log.3`. This pattern would be +applied up to `bigchain.log.5` after which `bigchain.log.5` would be +overwritten by `bigchain.log.4`, thus ending the rotation cycle of whatever +logs were in `bigchain.log.5`. + + +### log.level_console +The log level used to log to the console. Possible allowed values are the ones +defined by [Python](https://docs.python.org/3.6/library/logging.html#levels), +but case insensitive for convenience's sake: + +``` +"critical", "error", "warning", "info", "debug", "notset" +``` + +**Example**: + +``` +{ + "log": { + "level_console": "info" + } +} +``` + +**Defaults to**: `"info"`. + + +### log.level_logfile +The log level used to log to the log file. Possible allowed values are the ones +defined by [Python](https://docs.python.org/3.6/library/logging.html#levels), +but case insensitive for convenience's sake: + +``` +"critical", "error", "warning", "info", "debug", "notset" +``` + +**Example**: + +``` +{ + "log": { + "level_file": "info" + } +} +``` + +**Defaults to**: `"info"`. + + +### log.datefmt_console +The format string for the date/time portion of a message, when logged to the +console. + +**Example**: + +``` +{ + "log": { + "datefmt_console": "%x %X %Z" + } +} +``` + +**Defaults to**: `"%Y-%m-%d %H:%M:%S"`. + +For more information on how to construct the format string please consult the +table under Python's documentation of + [`time.strftime(format[, t])`](https://docs.python.org/3.6/library/time.html#time.strftime) + +### log.datefmt_logfile +The format string for the date/time portion of a message, when logged to a log + file. + +**Example**: + +``` +{ + "log": { + "datefmt_logfile": "%c %z" + } +} +``` + +**Defaults to**: `"%Y-%m-%d %H:%M:%S"`. + +For more information on how to construct the format string please consult the +table under Python's documentation of + [`time.strftime(format[, t])`](https://docs.python.org/3.6/library/time.html#time.strftime) + + +### log.fmt_console +A string used to format the log messages when logged to the console. + +**Example**: + +``` +{ + "log": { + "fmt_console": "%(asctime)s [%(levelname)s] %(message)s %(process)d" + } +} +``` + +**Defaults to**: `"[%(asctime)s] [%(levelname)s] (%(name)s) %(message)s (%(processName)-10s - pid: %(process)d)"` + +For more information on possible formatting options please consult Python's +documentation on +[LogRecord attributes](https://docs.python.org/3.6/library/logging.html#logrecord-attributes) + + +### log.fmt_logfile +A string used to format the log messages when logged to a log file. + +**Example**: + +``` +{ + "log": { + "fmt_logfile": "%(asctime)s [%(levelname)s] %(message)s %(process)d" + } +} +``` + +**Defaults to**: `"[%(asctime)s] [%(levelname)s] (%(name)s) %(message)s (%(processName)-10s - pid: %(process)d)"` + +For more information on possible formatting options please consult Python's +documentation on +[LogRecord attributes](https://docs.python.org/3.6/library/logging.html#logrecord-attributes) + + +### log.granular_levels +Log levels for BigchainDB's modules. This can be useful to control the log +level of specific parts of the application. As an example, if you wanted the +logging of the `core.py` module to be more verbose, you would set the + configuration shown in the example below. + +**Example**: + +``` +{ + "log": { + "granular_levels": { + "bichaindb.core": "debug" + } +} +``` + +**Defaults to**: `"{}"` diff --git a/docs/server/source/topic-guides/index.rst b/docs/server/source/topic-guides/index.rst deleted file mode 100644 index 9386fe87..00000000 --- a/docs/server/source/topic-guides/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -Topic Guides -============ - -.. note:: - - Most of the Topic Guides have been moved over to `the root BigchainDB project docs `_. - - -.. toctree:: - :maxdepth: 1 - - models diff --git a/docs/server/source/topic-guides/models.md b/docs/server/source/topic-guides/models.md deleted file mode 100644 index 7f993feb..00000000 --- a/docs/server/source/topic-guides/models.md +++ /dev/null @@ -1,6 +0,0 @@ -# The Transaction, Block and Vote Models - -This page about transaction concepts and data models was getting too big, so it was split into smaller pages. It will be deleted eventually, so update your links. Here's where you can find the new pages: - -* [Transaction Concepts](https://docs.bigchaindb.com/en/latest/transaction-concepts.html) -* [Data Models (all of them)](../data-models/index.html) diff --git a/docs/server/source/drivers-clients/websocket-event-stream-api.rst b/docs/server/source/websocket-event-stream-api.rst similarity index 90% rename from docs/server/source/drivers-clients/websocket-event-stream-api.rst rename to docs/server/source/websocket-event-stream-api.rst index 22effbc1..3ce86553 100644 --- a/docs/server/source/drivers-clients/websocket-event-stream-api.rst +++ b/docs/server/source/websocket-event-stream-api.rst @@ -2,7 +2,9 @@ The WebSocket Event Stream API ============================== .. important:: - This is currently scheduled to be implemented in BigchainDB Server 0.10. + The WebSocket Event Stream runs on a different port than the Web API. The + default port for the Web API is `9984`, while the one for the Event Stream + is `9985`. BigchainDB provides real-time event streams over the WebSocket protocol with the Event Stream API. @@ -28,7 +30,7 @@ response contains a ``streams_`` property in ``_links``:: { "_links": { - "streams_v1": "ws://example.com:9984/api/v1/streams/" + "streams_v1": "ws://example.com:9985/api/v1/streams/" } } @@ -80,9 +82,9 @@ the transaction's ID, associated asset ID, and containing block's ID. Example message:: { - "txid": "", - "assetid": "", - "blockid": "" + "tx_id": "", + "asset_id": "", + "block_id": "" } diff --git a/k8s/bigchaindb/bigchaindb-dep.yaml b/k8s/bigchaindb/bigchaindb-dep.yaml new file mode 100644 index 00000000..b8550249 --- /dev/null +++ b/k8s/bigchaindb/bigchaindb-dep.yaml @@ -0,0 +1,74 @@ +############################################################### +# This config file runs bigchaindb:0.10.1 as a k8s Deployment # +# and it connects to the mongodb backend running as a # +# separate pod # +############################################################### + +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: bdb-instance-0-dep +spec: + replicas: 1 + template: + metadata: + labels: + app: bdb-instance-0-dep + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: bigchaindb + image: bigchaindb/bigchaindb:0.10.1 + imagePullPolicy: IfNotPresent + args: + - start + env: + - name: BIGCHAINDB_DATABASE_HOST + value: mdb-instance-0 + - name: BIGCHAINDB_DATABASE_PORT + value: "27017" + - name: BIGCHAINDB_DATABASE_REPLICASET + value: bigchain-rs + - name: BIGCHAINDB_DATABASE_BACKEND + value: mongodb + - name: BIGCHAINDB_DATABASE_NAME + value: bigchain + - name: BIGCHAINDB_SERVER_BIND + value: 0.0.0.0:9984 + - name: BIGCHAINDB_KEYPAIR_PUBLIC + value: "" + - name: BIGCHAINDB_KEYPAIR_PRIVATE + value: "" + - name: BIGCHAINDB_BACKLOG_REASSIGN_DELAY + value: "120" + - name: BIGCHAINDB_DATABASE_MAXTRIES + value: "3" + - name: BIGCHAINDB_DATABASE_CONNECTION_TIMEOUT + value: "120" + - name: BIGCHAINDB_LOG_LEVEL_CONSOLE + value: debug + # The following env var is not required for the bootstrap/first node + #- name: BIGCHAINDB_KEYRING + # value: "" + ports: + - containerPort: 9984 + hostPort: 9984 + name: bdb-port + protocol: TCP + resources: + limits: + cpu: 200m + memory: 768Mi + livenessProbe: + httpGet: + path: / + port: 9984 + initialDelaySeconds: 15 + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: / + port: 9984 + initialDelaySeconds: 15 + timeoutSeconds: 10 + restartPolicy: Always diff --git a/k8s/bigchaindb/bigchaindb-svc.yaml b/k8s/bigchaindb/bigchaindb-svc.yaml new file mode 100644 index 00000000..9927a92d --- /dev/null +++ b/k8s/bigchaindb/bigchaindb-svc.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: bdb-instance-0 + namespace: default + labels: + name: bdb-instance-0 +spec: + selector: + app: bdb-instance-0-dep + ports: + - port: 9984 + targetPort: 9984 + name: bdb-port + type: ClusterIP + clusterIP: None diff --git a/k8s/configuration/config-map.yaml b/k8s/configuration/config-map.yaml new file mode 100644 index 00000000..1c04dbf7 --- /dev/null +++ b/k8s/configuration/config-map.yaml @@ -0,0 +1,36 @@ +####################################################### +# This YAML file desribes a ConfigMap for the cluster # +####################################################### + +apiVersion: v1 +kind: ConfigMap +metadata: + name: mdb-mon + namespace: default +data: + api-key: "" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: mdb-backup + namespace: default +data: + api-key: "" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: mdb-fqdn + namespace: default +data: + fqdn: mdb-instance-0 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: mongodb-whitelist + namespace: default +data: + allowed-hosts: "all" + diff --git a/k8s/mongodb-backup-agent/container/Dockerfile b/k8s/mongodb-backup-agent/container/Dockerfile new file mode 100644 index 00000000..8407fb09 --- /dev/null +++ b/k8s/mongodb-backup-agent/container/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:xenial +LABEL maintainer "dev@bigchaindb.com" +ARG DEBIAN_FRONTEND=noninteractive +ARG DEB_FILE=mongodb-mms-backup-agent_latest_amd64.ubuntu1604.deb +ARG FILE_URL="https://cloud.mongodb.com/download/agent/backup/"$DEB_FILE +WORKDIR / +RUN apt update \ + && apt -y upgrade \ + && apt -y install --no-install-recommends curl ca-certificates logrotate \ + libsasl2-2 \ + && curl -OL $FILE_URL \ + && dpkg -i $DEB_FILE \ + && rm -f $DEB_FILE \ + && apt -y purge curl \ + && apt -y autoremove \ + && apt clean +COPY mongodb_backup_agent_entrypoint.bash / +RUN chown -R mongodb-mms-agent:mongodb-mms-agent /etc/mongodb-mms/ +ENTRYPOINT ["/mongodb_backup_agent_entrypoint.bash"] diff --git a/k8s/mongodb-backup-agent/container/docker_build_and_push.bash b/k8s/mongodb-backup-agent/container/docker_build_and_push.bash new file mode 100755 index 00000000..e57e58a1 --- /dev/null +++ b/k8s/mongodb-backup-agent/container/docker_build_and_push.bash @@ -0,0 +1,5 @@ +#!/bin/bash + +docker build -t bigchaindb/mongodb-backup-agent:1.0 . + +docker push bigchaindb/mongodb-backup-agent:1.0 diff --git a/k8s/mongodb-backup-agent/container/mongodb_backup_agent_entrypoint.bash b/k8s/mongodb-backup-agent/container/mongodb_backup_agent_entrypoint.bash new file mode 100755 index 00000000..3eb20633 --- /dev/null +++ b/k8s/mongodb-backup-agent/container/mongodb_backup_agent_entrypoint.bash @@ -0,0 +1,20 @@ +#!/bin/bash + +set -euo pipefail + +MONGODB_BACKUP_CONF_FILE=/etc/mongodb-mms/backup-agent.config + +mms_api_key=`printenv MMS_API_KEY` + +if [[ -z "${mms_api_key}" ]]; then + echo "Invalid environment settings detected. Exiting!" + exit 1 +fi + +sed -i '/mmsApiKey/d' $MONGODB_BACKUP_CONF_FILE + +echo "mmsApiKey="${mms_api_key} >> $MONGODB_BACKUP_CONF_FILE + +echo "INFO: starting mdb backup..." +exec mongodb-mms-backup-agent \ + -c $MONGODB_BACKUP_CONF_FILE diff --git a/k8s/mongodb-backup-agent/mongo-backup-dep.yaml b/k8s/mongodb-backup-agent/mongo-backup-dep.yaml new file mode 100644 index 00000000..b3d5a9ec --- /dev/null +++ b/k8s/mongodb-backup-agent/mongo-backup-dep.yaml @@ -0,0 +1,27 @@ +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: mdb-backup-instance-0-dep +spec: + replicas: 1 + template: + metadata: + labels: + app: mdb-backup-instance-0-dep + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: mdb-backup + image: bigchaindb/mongodb-backup-agent:1.0 + imagePullPolicy: Always + env: + - name: MMS_API_KEY + valueFrom: + configMapKeyRef: + name: mdb-backup + key: api-key + resources: + limits: + cpu: 200m + memory: 768Mi + restartPolicy: Always diff --git a/k8s/mongodb-monitoring-agent/container/Dockerfile b/k8s/mongodb-monitoring-agent/container/Dockerfile new file mode 100644 index 00000000..ec6496d8 --- /dev/null +++ b/k8s/mongodb-monitoring-agent/container/Dockerfile @@ -0,0 +1,54 @@ +# Dockerfile for MongoDB Monitoring Agent +# Use it to create bigchaindb/mongodb-monitoring-agent +# on Docker Hub. + +# "Never install the Monitoring Agent on the same server as a data bearing mongod instance." +# More help: +# https://docs.cloudmanager.mongodb.com/tutorial/install-monitoring-agent-with-deb-package/ + +FROM ubuntu:xenial +LABEL maintainer "dev@bigchaindb.com" +# Using ARG, one can set DEBIAN_FRONTEND=noninteractive and others +# just for the duration of the build: +ARG DEBIAN_FRONTEND=noninteractive +ARG DEB_FILE=mongodb-mms-monitoring-agent_latest_amd64.ubuntu1604.deb +ARG FILE_URL="https://cloud.mongodb.com/download/agent/monitoring/"$DEB_FILE + +# Download the Monitoring Agent as a .deb package and install it +WORKDIR / +RUN apt update \ + && apt -y upgrade \ + && apt -y install --no-install-recommends curl ca-certificates logrotate \ + libsasl2-2 \ + && curl -OL $FILE_URL \ + && dpkg -i $DEB_FILE \ + && rm -f $DEB_FILE \ + && apt -y purge curl \ + && apt -y autoremove \ + && apt clean + +# The above installation puts a default config file in +# /etc/mongodb-mms/monitoring-agent.config +# It should contain a line like: "mmsApiKey=" +# i.e. with no value specified. +# We need to set that value to the "agent API key" value from Cloud Manager, +# but of course that value varies from user to user, +# so we can't hard-code it into the Docker image. + +# Kubernetes can set an MMS_API_KEY environment variable +# in the container +# (including from Secrets or ConfigMaps) +# An entrypoint bash script can then use the value of MMS_API_KEY +# to write the mmsApiKey value in the config file +# /etc/mongodb-mms/monitoring-agent.config +# before running the MongoDB Monitoring Agent. + +# The MongoDB Monitoring Agent has other +# config settings besides mmsApiKey, +# but it's the only one that *must* be set. See: +# https://docs.cloudmanager.mongodb.com/reference/monitoring-agent/ + +COPY mongodb_mon_agent_entrypoint.bash / +RUN chown -R mongodb-mms-agent:mongodb-mms-agent /etc/mongodb-mms/ +#USER mongodb-mms-agent - BUG(Krish) Uncomment after tests are complete +ENTRYPOINT ["/mongodb_mon_agent_entrypoint.bash"] diff --git a/k8s/mongodb-monitoring-agent/container/docker_build_and_push.bash b/k8s/mongodb-monitoring-agent/container/docker_build_and_push.bash new file mode 100755 index 00000000..d2219b08 --- /dev/null +++ b/k8s/mongodb-monitoring-agent/container/docker_build_and_push.bash @@ -0,0 +1,5 @@ +#!/bin/bash + +docker build -t bigchaindb/mongodb-monitoring-agent:1.0 . + +docker push bigchaindb/mongodb-monitoring-agent:1.0 diff --git a/k8s/mongodb-monitoring-agent/container/mongodb_mon_agent_entrypoint.bash b/k8s/mongodb-monitoring-agent/container/mongodb_mon_agent_entrypoint.bash new file mode 100755 index 00000000..6454c729 --- /dev/null +++ b/k8s/mongodb-monitoring-agent/container/mongodb_mon_agent_entrypoint.bash @@ -0,0 +1,30 @@ +#!/bin/bash + +set -euo pipefail +# -e Abort at the first failed line (i.e. if exit status is not 0) +# -u Abort when undefined variable is used +# -o pipefail (Bash-only) Piped commands return the status +# of the last failed command, rather than the status of the last command + +MONGODB_MON_CONF_FILE=/etc/mongodb-mms/monitoring-agent.config + +mms_api_key=`printenv MMS_API_KEY` + +if [[ -z "${mms_api_key}" ]]; then + echo "Invalid environment settings detected. Exiting!" + exit 1 +fi + +# Delete all lines containing "mmsApiKey" in the MongoDB Monitoring Agent +# config file /etc/mongodb-mms/monitoring-agent.config +sed -i '/mmsApiKey/d' $MONGODB_MON_CONF_FILE + +# Append a new line of the form +# mmsApiKey=value_of_MMS_API_KEY +echo "mmsApiKey="${mms_api_key} >> $MONGODB_MON_CONF_FILE + +# start mdb monitoring agent +echo "INFO: starting mdb monitor..." +exec mongodb-mms-monitoring-agent \ + --conf $MONGODB_MON_CONF_FILE \ + --loglevel debug diff --git a/k8s/mongodb-monitoring-agent/mongo-mon-dep.yaml b/k8s/mongodb-monitoring-agent/mongo-mon-dep.yaml new file mode 100644 index 00000000..98abe92b --- /dev/null +++ b/k8s/mongodb-monitoring-agent/mongo-mon-dep.yaml @@ -0,0 +1,38 @@ +############################################################ +# This config file defines a k8s Deployment for the # +# bigchaindb/mongodb-monitoring-agent:latest Docker image # +# # +# It connects to a MongoDB instance in a separate pod, # +# all remote MongoDB instances in the cluster, # +# and also to MongoDB Cloud Manager (an external service). # +# Notes: # +# MongoDB agents connect to Cloud Manager on port 443. # +############################################################ + +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: mdb-mon-instance-0-dep +spec: + replicas: 1 + template: + metadata: + labels: + app: mdb-mon-instance-0-dep + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: mdb-mon + image: bigchaindb/mongodb-monitoring-agent:1.0 + imagePullPolicy: Always + env: + - name: MMS_API_KEY + valueFrom: + configMapKeyRef: + name: mdb-mon + key: api-key + resources: + limits: + cpu: 200m + memory: 768Mi + restartPolicy: Always diff --git a/k8s/mongodb/container/Dockerfile b/k8s/mongodb/container/Dockerfile new file mode 100644 index 00000000..e9667f95 --- /dev/null +++ b/k8s/mongodb/container/Dockerfile @@ -0,0 +1,12 @@ +FROM mongo:3.4.3 +LABEL maintainer "dev@bigchaindb.com" +WORKDIR / +RUN apt-get update \ + && apt-get -y upgrade \ + && apt-get autoremove \ + && apt-get clean +COPY mongod.conf.template /etc/mongod.conf.template +COPY mongod_entrypoint/mongod_entrypoint / +VOLUME /data/db /data/configdb +EXPOSE 27017 +ENTRYPOINT ["/mongod_entrypoint"] diff --git a/k8s/mongodb/container/Makefile b/k8s/mongodb/container/Makefile new file mode 100644 index 00000000..0a3779af --- /dev/null +++ b/k8s/mongodb/container/Makefile @@ -0,0 +1,51 @@ +# Targets: +# all: Cleans, formats src files, builds the code, builds the docker image +# clean: Removes the binary and docker image +# format: Formats the src files +# build: Builds the code +# docker: Builds the code and docker image +# push: Push the docker image to Docker hub + +GOCMD=go +GOVET=$(GOCMD) tool vet +GOINSTALL=$(GOCMD) install +GOFMT=gofmt -s -w + +DOCKER_IMAGE_NAME?=bigchaindb/mongodb +DOCKER_IMAGE_TAG?=3.4.3 + +PWD=$(shell pwd) +BINARY_PATH=$(PWD)/mongod_entrypoint/ +BINARY_NAME=mongod_entrypoint +MAIN_FILE = $(BINARY_PATH)/mongod_entrypoint.go +SRC_FILES = $(BINARY_PATH)/mongod_entrypoint.go + +.PHONY: all + +all: clean build docker + +clean: + @echo "removing any pre-built binary"; + -@rm $(BINARY_PATH)/$(BINARY_NAME); + @echo "remove any pre-built docker image"; + -@docker rmi $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG); + +format: + $(GOFMT) $(SRC_FILES) + +build: format + $(shell cd $(BINARY_PATH) && \ + export GOPATH="$(BINARY_PATH)" && \ + export GOBIN="$(BINARY_PATH)" && \ + CGO_ENABLED=0 GOOS=linux $(GOINSTALL) -ldflags "-s" -a -installsuffix cgo $(MAIN_FILE)) + +docker: build + docker build \ + -t $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) .; + +vet: + $(GOVET) . + +push: + docker push \ + $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG); diff --git a/k8s/mongodb/container/README.md b/k8s/mongodb/container/README.md new file mode 100644 index 00000000..baad9f13 --- /dev/null +++ b/k8s/mongodb/container/README.md @@ -0,0 +1,88 @@ +## Custom MongoDB container for BigchainDB Backend + +### Need + +* MongoDB needs the hostname provided in the rs.initiate() command to be + resolvable through the hosts file locally. +* In the future, with the introduction of TLS for inter-cluster MongoDB + communications, we will need a way to specify detailed configuration. +* We also need a way to overwrite certain parameters to suit our use case. + + +### Step 1: Build the Latest Container + +`make` from the root of this project. + + +### Step 2: Run the Container + +``` +docker run \ +--name=mdb1 \ +--publish=: \ +--rm=true \ +bigchaindb/mongodb \ +--replica-set-name \ +--fqdn \ +--port +``` + +#### Step 3: Initialize the Replica Set + +Login to one of the MongoDB containers, say mdb1: + +`docker exec -it mdb1 bash` + +Start the `mongo` shell: + +`mongo --port 27017` + + +Run the rs.initiate() command: +``` +rs.initiate({ + _id : ":" + } ] +}) +``` + +For example: + +``` +rs.initiate({ _id : "test-repl-set", members: [ { _id : 0, host : +"mdb-instance-0.westeurope.cloudapp.azure.com:27017" } ] }) +``` + +You should also see changes in the mongo shell prompt from `>` to +`test-repl-set:OTHER>` to `test-repl-set:SECONDARY>` to finally +`test-repl-set:PRIMARY>`. +If this instance is not the primary, you can use the `rs.status()` command to +find out who is the primary. + + +#### Step 4: Add members to the Replica Set + +We can only add members to a replica set from the PRIMARY instance. +Login to the PRIMARY and open a `mongo` shell. + +Run the rs.add() command with the ip and port number of the other +containers/instances: +``` +rs.add(":") +``` + +For example: + +Add mdb2 to replica set from mdb1: +``` +rs.add("bdb-cluster-1.northeurope.cloudapp.azure.com:27017") +``` + +Add mdb3 to replica set from mdb1: +``` +rs.add("bdb-cluster-2.northeurope.cloudapp.azure.com:27017") +``` + diff --git a/k8s/mongodb/container/mongod.conf.template b/k8s/mongodb/container/mongod.conf.template new file mode 100644 index 00000000..28e74acf --- /dev/null +++ b/k8s/mongodb/container/mongod.conf.template @@ -0,0 +1,89 @@ +# mongod.conf + +# for documentation of all options, see: +# http://docs.mongodb.org/manual/reference/configuration-options/ + +# where to write logging data. +systemLog: + verbosity: 0 + #TODO traceAllExceptions: true + timeStampFormat: iso8601-utc + component: + accessControl: + verbosity: 0 + command: + verbosity: 0 + control: + verbosity: 0 + ftdc: + verbosity: 0 + geo: + verbosity: 0 + index: + verbosity: 0 + network: + verbosity: 0 + query: + verbosity: 0 + replication: + verbosity: 0 + sharding: + verbosity: 0 + storage: + verbosity: 0 + journal: + verbosity: 0 + write: + verbosity: 0 + +processManagement: + fork: false + pidFilePath: /tmp/mongod.pid + +net: + port: PORT + bindIp: 0.0.0.0 + maxIncomingConnections: 8192 + wireObjectCheck: false + unixDomainSocket: + enabled: false + pathPrefix: /tmp + filePermissions: 0700 + http: + enabled: false + compression: + compressors: snappy + #ssl: TODO + +#security: TODO + +#setParameter: + #notablescan: 1 TODO + #logUserIds: 1 TODO + +storage: + dbPath: /data/db + indexBuildRetry: true + journal: + enabled: true + commitIntervalMs: 100 + directoryPerDB: true + engine: wiredTiger + wiredTiger: + engineConfig: + journalCompressor: snappy + collectionConfig: + blockCompressor: snappy + indexConfig: + prefixCompression: true # TODO false may affect performance? + +operationProfiling: + mode: slowOp + slowOpThresholdMs: 100 + +replication: + replSetName: REPLICA_SET_NAME + enableMajorityReadConcern: true + +#sharding: + diff --git a/k8s/mongodb/container/mongod_entrypoint/mongod_entrypoint.go b/k8s/mongodb/container/mongod_entrypoint/mongod_entrypoint.go new file mode 100644 index 00000000..57b48974 --- /dev/null +++ b/k8s/mongodb/container/mongod_entrypoint/mongod_entrypoint.go @@ -0,0 +1,154 @@ +package main + +import ( + "bytes" + "errors" + "flag" + "fmt" + "io/ioutil" + "log" + "net" + "os" + "regexp" + "syscall" +) + +const ( + mongoConfFilePath string = "/etc/mongod.conf" + mongoConfTemplateFilePath string = "/etc/mongod.conf.template" + hostsFilePath string = "/etc/hosts" +) + +var ( + // Use the same entrypoint as the mongo:3.4.2 image; just supply it with + // the mongod conf file with custom params + mongoStartCmd []string = []string{"/entrypoint.sh", "mongod", "--config", + mongoConfFilePath} +) + +// context struct stores the user input and the constraints for the specified +// input. It also stores the keyword that needs to be replaced in the template +// files. +type context struct { + cliInput string + templateKeyword string + regex string +} + +// sanity function takes the pre-defined constraints and the user inputs as +// arguments and validates user input based on regex matching +func sanity(input map[string]*context, fqdn, ip string) error { + var format *regexp.Regexp + for _, ctx := range input { + format = regexp.MustCompile(ctx.regex) + if format.MatchString(ctx.cliInput) == false { + return errors.New(fmt.Sprintf( + "Invalid value: '%s' for '%s'. Can be '%s'", + ctx.cliInput, + ctx.templateKeyword, + ctx.regex)) + } + } + + format = regexp.MustCompile(`[a-z0-9-.]+`) + if format.MatchString(fqdn) == false { + return errors.New(fmt.Sprintf( + "Invalid value: '%s' for FQDN. Can be '%s'", + fqdn, + format)) + } + + if net.ParseIP(ip) == nil { + return errors.New(fmt.Sprintf( + "Invalid value: '%s' for IPv4. Can be a.b.c.d", + ip)) + } + + return nil +} + +// createFile function takes the pre-defined keywords, user inputs, the +// template file path and the new file path location as parameters, and +// creates a new file at file path with all the keywords replaced by inputs. +func createFile(input map[string]*context, + template string, conf string) error { + // read the template + contents, err := ioutil.ReadFile(template) + if err != nil { + return err + } + // replace + for _, ctx := range input { + contents = bytes.Replace(contents, []byte(ctx.templateKeyword), + []byte(ctx.cliInput), -1) + } + // write + err = ioutil.WriteFile(conf, contents, 0644) + if err != nil { + return err + } + return nil +} + +// updateHostsFile takes the FQDN supplied as input to the container and adds +// an entry to /etc/hosts +func updateHostsFile(ip, fqdn string) error { + fileHandle, err := os.OpenFile(hostsFilePath, os.O_APPEND|os.O_WRONLY, + os.ModeAppend) + if err != nil { + return err + } + defer fileHandle.Close() + // append + _, err = fileHandle.WriteString(fmt.Sprintf("\n%s %s\n", ip, fqdn)) + if err != nil { + return err + } + return nil +} + +func main() { + var fqdn, ip string + input := make(map[string]*context) + + input["replica-set-name"] = &context{} + input["replica-set-name"].regex = `[a-z]+` + input["replica-set-name"].templateKeyword = "REPLICA_SET_NAME" + flag.StringVar(&input["replica-set-name"].cliInput, + "replica-set-name", + "", + "replica set name") + + input["port"] = &context{} + input["port"].regex = `[0-9]{4,5}` + input["port"].templateKeyword = "PORT" + flag.StringVar(&input["port"].cliInput, + "port", + "", + "mongodb port number") + + flag.StringVar(&fqdn, "fqdn", "", "FQDN of the MongoDB instance") + flag.StringVar(&ip, "ip", "", "IPv4 address of the container") + + flag.Parse() + err := sanity(input, fqdn, ip) + if err != nil { + log.Fatal(err) + } + + err = createFile(input, mongoConfTemplateFilePath, mongoConfFilePath) + if err != nil { + log.Fatal(err) + } + + err = updateHostsFile(ip, fqdn) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Starting Mongod....") + err = syscall.Exec(mongoStartCmd[0], mongoStartCmd[0:], os.Environ()) + if err != nil { + panic(err) + } +} diff --git a/k8s/mongodb/mongo-pvc.yaml b/k8s/mongodb/mongo-pvc.yaml new file mode 100644 index 00000000..da257527 --- /dev/null +++ b/k8s/mongodb/mongo-pvc.yaml @@ -0,0 +1,35 @@ +########################################################### +# This section file desribes a k8s pvc for mongodb dbPath # +########################################################### +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: mongo-db-claim + annotations: + volume.beta.kubernetes.io/storage-class: slow-db +spec: + accessModes: + - ReadWriteOnce + # FIXME(Uncomment when ACS supports this!) + # persistentVolumeReclaimPolicy: Retain + resources: + requests: + storage: 20Gi +--- +############################################################# +# This YAML section desribes a k8s pvc for mongodb configDB # +############################################################# +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: mongo-configdb-claim + annotations: + volume.beta.kubernetes.io/storage-class: slow-configdb +spec: + accessModes: + - ReadWriteOnce + # FIXME(Uncomment when ACS supports this!) + # persistentVolumeReclaimPolicy: Retain + resources: + requests: + storage: 1Gi diff --git a/k8s/mongodb/mongo-sc.yaml b/k8s/mongodb/mongo-sc.yaml new file mode 100644 index 00000000..2f291ffe --- /dev/null +++ b/k8s/mongodb/mongo-sc.yaml @@ -0,0 +1,23 @@ +#################################################################### +# This YAML section desribes a StorageClass for the mongodb dbPath # +#################################################################### +kind: StorageClass +apiVersion: storage.k8s.io/v1beta1 +metadata: + name: slow-db +provisioner: kubernetes.io/azure-disk +parameters: + skuName: Standard_LRS + location: westeurope +--- +###################################################################### +# This YAML section desribes a StorageClass for the mongodb configDB # +###################################################################### +kind: StorageClass +apiVersion: storage.k8s.io/v1beta1 +metadata: + name: slow-configdb +provisioner: kubernetes.io/azure-disk +parameters: + skuName: Standard_LRS + location: westeurope diff --git a/k8s/mongodb/mongo-ss.yaml b/k8s/mongodb/mongo-ss.yaml new file mode 100644 index 00000000..2f180929 --- /dev/null +++ b/k8s/mongodb/mongo-ss.yaml @@ -0,0 +1,73 @@ +######################################################################## +# This YAML file desribes a StatefulSet with a service for running and # +# exposing a MongoDB instance. # +# It depends on the configdb and db k8s pvc. # +######################################################################## + +apiVersion: apps/v1beta1 +kind: StatefulSet +metadata: + name: mdb-instance-0-ss + namespace: default +spec: + serviceName: mdb-instance-0 + replicas: 1 + template: + metadata: + name: mdb-instance-0-ss + labels: + app: mdb-instance-0-ss + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: mongodb + image: bigchaindb/mongodb:3.4.3 + imagePullPolicy: IfNotPresent + env: + - name: MONGODB_FQDN + valueFrom: + configMapKeyRef: + name: mdb-fqdn + key: fqdn + - name: MONGODB_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + args: + - --replica-set-name=bigchain-rs + - --fqdn=$(MONGODB_FQDN) + - --port=27017 + - --ip=$(MONGODB_POD_IP) + securityContext: + capabilities: + add: + - FOWNER + ports: + - containerPort: 27017 + hostPort: 27017 + name: mdb-port + protocol: TCP + volumeMounts: + - name: mdb-db + mountPath: /data/db + - name: mdb-configdb + mountPath: /data/configdb + resources: + limits: + cpu: 200m + memory: 768Mi + livenessProbe: + tcpSocket: + port: mdb-port + successThreshold: 1 + failureThreshold: 3 + periodSeconds: 15 + timeoutSeconds: 1 + restartPolicy: Always + volumes: + - name: mdb-db + persistentVolumeClaim: + claimName: mongo-db-claim + - name: mdb-configdb + persistentVolumeClaim: + claimName: mongo-configdb-claim diff --git a/k8s/mongodb/mongo-svc.yaml b/k8s/mongodb/mongo-svc.yaml new file mode 100644 index 00000000..2c81797a --- /dev/null +++ b/k8s/mongodb/mongo-svc.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: mdb-instance-0 + namespace: default + labels: + name: mdb-instance-0 +spec: + selector: + app: mdb-instance-0-ss + ports: + - port: 27017 + targetPort: 27017 + name: mdb-port + type: ClusterIP + clusterIP: None diff --git a/k8s/nginx-3scale/nginx-3scale-dep.yaml b/k8s/nginx-3scale/nginx-3scale-dep.yaml new file mode 100644 index 00000000..8b1fa673 --- /dev/null +++ b/k8s/nginx-3scale/nginx-3scale-dep.yaml @@ -0,0 +1,98 @@ +############################################################### +# This config file runs nginx as a k8s deployment and exposes # +# it using an external load balancer. # +# This deployment is used as a front end to both BigchainDB # +# and MongoDB. # +############################################################### + +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: ngx-instance-0-dep +spec: + replicas: 1 + template: + metadata: + labels: + app: ngx-instance-0-dep + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: nginx-3scale + image: bigchaindb/nginx_3scale:1.0 + # TODO(Krish): Change later to IfNotPresent + imagePullPolicy: Always + env: + - name: MONGODB_FRONTEND_PORT + value: $(NGX_INSTANCE_0_SERVICE_PORT_NGX_PUBLIC_MDB_PORT) + - name: MONGODB_BACKEND_HOST + # NGINX requires FQDN to resolve names + value: mdb-instance-0.default.svc.cluster.local + - name: MONGODB_BACKEND_PORT + value: "27017" + - name: BIGCHAINDB_FRONTEND_PORT + value: $(NGX_INSTANCE_0_SERVICE_PORT_NGX_PUBLIC_BDB_PORT) + - name: BIGCHAINDB_BACKEND_HOST + # NGINX requires FQDN to resolve names + value: bdb-instance-0.default.svc.cluster.local + - name: BIGCHAINDB_BACKEND_PORT + value: "9984" + - name: MONGODB_WHITELIST + valueFrom: + configMapKeyRef: + name: mongodb-whitelist + key: allowed-hosts + - name: DNS_SERVER + value: "10.0.0.10" + - name: NGINX_HEALTH_CHECK_PORT + value: "8888" + # TODO(Krish): use secrets for sensitive info + - name: THREESCALE_SECRET_TOKEN + value: "" + - name: THREESCALE_SERVICE_ID + value: "" + - name: THREESCALE_VERSION_HEADER + value: "" + - name: THREESCALE_PROVIDER_KEY + value: "" + - name: THREESCALE_FRONTEND_API_DNS_NAME + value: "" + - name: THREESCALE_UPSTREAM_API_PORT + value: "" + ports: + - containerPort: 27017 + hostPort: 27017 + name: public-mdb-port + protocol: TCP + - containerPort: 443 + hostPort: 443 + name: public-bdb-port + protocol: TCP + - containerPort: 8888 + hostPort: 8888 + name: health-check + protocol: TCP + - containerPort: 8080 + hostPort: 8080 + name: public-api-port + protocol: TCP + volumeMounts: + - name: https + mountPath: /usr/local/openresty/nginx/conf/ssl/ + readOnly: true + resources: + limits: + cpu: 200m + memory: 768Mi + livenessProbe: + httpGet: + path: / + port: 8888 + initialDelaySeconds: 15 + timeoutSeconds: 10 + restartPolicy: Always + volumes: + - name: https + secret: + secretName: certs + defaultMode: 0400 diff --git a/k8s/nginx-3scale/nginx-3scale-secret.yaml b/k8s/nginx-3scale/nginx-3scale-secret.yaml new file mode 100644 index 00000000..8f725313 --- /dev/null +++ b/k8s/nginx-3scale/nginx-3scale-secret.yaml @@ -0,0 +1,13 @@ +# Certificate data should be base64 encoded before embedding them here by using +# `cat cert.pem | base64 -w 0 > cert.pem.b64` and then copy the resulting +# value here. Same goes for cert.key. +# Ref: https://kubernetes.io/docs/concepts/configuration/secret/ + +apiVersion: v1 +kind: Secret +metadata: + name: certs +type: Opaque +data: + cert.pem: + cert.key: diff --git a/k8s/nginx-3scale/nginx-3scale-svc.yaml b/k8s/nginx-3scale/nginx-3scale-svc.yaml new file mode 100644 index 00000000..db212222 --- /dev/null +++ b/k8s/nginx-3scale/nginx-3scale-svc.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Service +metadata: + name: ngx-instance-0 + namespace: default + labels: + name: ngx-instance-0 + annotations: + # NOTE: the following annotation is a beta feature and + # only available in GCE/GKE and Azure as of now + # Ref: https://kubernetes.io/docs/tutorials/services/source-ip/ + service.beta.kubernetes.io/external-traffic: OnlyLocal +spec: + selector: + app: ngx-instance-0-dep + ports: + - port: 443 + targetPort: 443 + name: ngx-public-bdb-port + protocol: TCP + - port: 8080 + targetPort: 8080 + name: ngx-public-3scale-port + protocol: TCP + - port: 27017 + targetPort: 27017 + name: ngx-public-mdb-port + protocol: TCP + type: LoadBalancer diff --git a/k8s/nginx/container/Dockerfile b/k8s/nginx/container/Dockerfile new file mode 100644 index 00000000..c6c4dd3f --- /dev/null +++ b/k8s/nginx/container/Dockerfile @@ -0,0 +1,11 @@ +FROM nginx:1.11.10 +LABEL maintainer "dev@bigchaindb.com" +WORKDIR / +RUN apt-get update \ + && apt-get -y upgrade \ + && apt-get autoremove \ + && apt-get clean +COPY nginx.conf.template /etc/nginx/nginx.conf +COPY nginx_entrypoint.bash / +EXPOSE 80 443 27017 +ENTRYPOINT ["/nginx_entrypoint.bash"] diff --git a/k8s/nginx/container/README.md b/k8s/nginx/container/README.md new file mode 100644 index 00000000..30f42bfe --- /dev/null +++ b/k8s/nginx/container/README.md @@ -0,0 +1,69 @@ +## Custom Nginx container for a Node + +### Need + +* Since, BigchainDB and MongoDB both need to expose ports to the outside + world (inter and intra cluster), we need to have a basic DDoS mitigation + strategy to ensure that we can provide proper uptime and security these + core services. + +* We can have a proxy like nginx/haproxy in every node that listens to + global connections and applies cluster level entry policy. + +### Implementation +* For MongoDB cluster communication, we will use nginx with an environment + variable specifying a ":" separated list of IPs in the whitelist. This list + contains the IPs of exising instances in the MongoDB replica set so as to + allow connections from the whitelist and avoid a DDoS. + +* For BigchainDB connections, nginx needs to have rules to throttle + connections that are using resources over a threshold. + + +### Step 1: Build the Latest Container + +Run `docker build -t bigchaindb/nginx: .` from this folder. + +Optional: Upload container to Docker Hub: +`docker push bigchaindb/nginx:` + +### Step 2: Run the Container + +Note that the whilelist IPs must be specified with the subnet in the CIDR +format, eg: `1.2.3.4/16` + +``` +docker run \ +--env "MONGODB_FRONTEND_PORT=" \ +--env "MONGODB_BACKEND_HOST=" \ +--env "MONGODB_BACKEND_PORT=" \ +--env "BIGCHAINDB_FRONTEND_PORT=" \ +--env "BIGCHAINDB_BACKEND_HOST=" \ +--env "BIGCHAINDB_BACKEND_PORT=" \ +--env "MONGODB_WHITELIST=
" \ +--env "DNS_SERVER=" \ +--name=ngx \ +--publish=: \ +--publish=: \ +--rm=true \ +bigchaindb/nginx +``` + +For example: +``` +docker run \ +--env "MONGODB_FRONTEND_PORT=17017" \ +--env "MONGODB_BACKEND_HOST=localhost" \ +--env "MONGODB_BACKEND_PORT=27017" \ +--env "BIGCHAINDB_FRONTEND_PORT=80" \ +--env "BIGCHAINDB_BACKEND_HOST=localhost" \ +--env "BIGCHAINDB_BACKEND_PORT=9984" \ +--env "MONGODB_WHITELIST=192.168.0.0/16:10.0.2.0/24" \ +--name=ngx \ +--publish=80:80 \ +--publish=17017:17017 \ +--rm=true \ +bigchaindb/nginx +``` + diff --git a/k8s/nginx/container/nginx.conf.template b/k8s/nginx/container/nginx.conf.template new file mode 100644 index 00000000..6167dceb --- /dev/null +++ b/k8s/nginx/container/nginx.conf.template @@ -0,0 +1,107 @@ +worker_processes 2; +daemon off; +user nobody nogroup; +pid /tmp/nginx.pid; +error_log /etc/nginx/nginx.error.log; + +events { + worker_connections 256; + accept_mutex on; + use epoll; +} + +http { + server_names_hash_bucket_size 128; + access_log /etc/nginx/nginx.access.log combined buffer=16k flush=5s; + + # allow 10 req/sec from the same IP address, and store the counters in a + # `zone` or shared memory location tagged as 'one'. + limit_req_zone $binary_remote_addr zone=one:10m rate=10r/s; + + # enable logging when requests are being throttled + limit_req_log_level notice; + + # the http status code to return to the client when throttling; + # 429 is for TooManyRequests, + # ref. RFC 6585 + limit_req_status 429; + + resolver DNS_SERVER valid=20s; + + map $remote_addr $bdb_backend { + default BIGCHAINDB_BACKEND_HOST; + } + + server { + listen BIGCHAINDB_FRONTEND_PORT; + # server_name "FRONTEND_DNS_NAME"; + underscores_in_headers on; + + # max client request body size: avg transaction size + client_max_body_size 15k; + + # keepalive connection settings + keepalive_timeout 20s; + + # `slowloris` attack mitigation settings + client_body_timeout 10s; + client_header_timeout 10s; + + location / { + proxy_ignore_client_abort on; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Host $http_host; + proxy_redirect off; + + # TODO proxy_set_header X-Forwarded-Proto https; + + # limit requests from the same client, allow `burst` to 20 r/s, + # `nodelay` or drop connection immediately in case it exceeds this + # threshold. + limit_req zone=one burst=20 nodelay; + + proxy_pass http://$bdb_backend:BIGCHAINDB_BACKEND_PORT; + } + } +} + +# NGINX stream block for TCP and UDP proxies +stream { + log_format mdb_log '[$time_iso8601] $realip_remote_addr $remote_addr ' + '$proxy_protocol_addr $proxy_protocol_port ' + '$protocol $status $session_time $bytes_sent ' + '$bytes_received "$upstream_addr" "$upstream_bytes_sent" ' + '"$upstream_bytes_received" "$upstream_connect_time" '; + + access_log /etc/nginx/nginx.stream.access.log mdb_log buffer=16k flush=5s; + + # define a zone 'two' of size 10 megabytes to store the counters + # that hold number of TCP connections from a specific IP address + limit_conn_zone $binary_remote_addr zone=two:10m; + + # enable logging when connections are being throttled + limit_conn_log_level notice; + + resolver DNS_SERVER valid=20s; + + map $remote_addr $mdb_backend { + default MONGODB_BACKEND_HOST; + } + + server { + listen MONGODB_FRONTEND_PORT so_keepalive=10m:1m:5; + preread_timeout 30s; + tcp_nodelay on; + + # whitelist + #MONGODB_WHITELIST + allow all; + # deny access to everyone else + deny all; + + # allow 16 connections from the same IP address + limit_conn two 16; + + proxy_pass $mdb_backend:MONGODB_BACKEND_PORT; + } +} diff --git a/k8s/nginx/container/nginx_entrypoint.bash b/k8s/nginx/container/nginx_entrypoint.bash new file mode 100755 index 00000000..e40d89f4 --- /dev/null +++ b/k8s/nginx/container/nginx_entrypoint.bash @@ -0,0 +1,47 @@ +#!/bin/bash +set -euo pipefail + +mongo_frontend_port=`printenv MONGODB_FRONTEND_PORT` +mongo_backend_host=`printenv MONGODB_BACKEND_HOST` +mongo_backend_port=`printenv MONGODB_BACKEND_PORT` +bdb_frontend_port=`printenv BIGCHAINDB_FRONTEND_PORT` +bdb_backend_host=`printenv BIGCHAINDB_BACKEND_HOST` +bdb_backend_port=`printenv BIGCHAINDB_BACKEND_PORT` +mongo_whitelist=`printenv MONGODB_WHITELIST` +dns_server=`printenv DNS_SERVER` + +# sanity checks +if [[ -z "${mongo_frontend_port}" || \ + -z "${mongo_backend_host}" || \ + -z "${mongo_backend_port}" || \ + -z "${bdb_frontend_port}" || \ + -z "${bdb_backend_host}" || \ + -z "${bdb_backend_port}" || \ + -z "${dns_server}" ]] ; then + echo "Invalid environment settings detected. Exiting!" + exit 1 +fi + +NGINX_CONF_FILE=/etc/nginx/nginx.conf + +# configure the nginx.conf file with env variables +sed -i "s|MONGODB_FRONTEND_PORT|${mongo_frontend_port}|g" $NGINX_CONF_FILE +sed -i "s|MONGODB_BACKEND_HOST|${mongo_backend_host}|g" $NGINX_CONF_FILE +sed -i "s|MONGODB_BACKEND_PORT|${mongo_backend_port}|g" $NGINX_CONF_FILE +sed -i "s|BIGCHAINDB_FRONTEND_PORT|${bdb_frontend_port}|g" $NGINX_CONF_FILE +sed -i "s|BIGCHAINDB_BACKEND_HOST|${bdb_backend_host}|g" $NGINX_CONF_FILE +sed -i "s|BIGCHAINDB_BACKEND_PORT|${bdb_backend_port}|g" $NGINX_CONF_FILE +sed -i "s|DNS_SERVER|${dns_server}|g" $NGINX_CONF_FILE + +# populate the whitelist in the conf file as per MONGODB_WHITELIST env var +hosts=$(echo ${mongo_whitelist} | tr ":" "\n") +for host in $hosts; do + sed -i "s|MONGODB_WHITELIST|allow ${host};\n MONGODB_WHITELIST|g" $NGINX_CONF_FILE +done + +# remove the MONGODB_WHITELIST marker string from template +sed -i "s|MONGODB_WHITELIST||g" $NGINX_CONF_FILE + +# start nginx +echo "INFO: starting nginx..." +exec nginx -c /etc/nginx/nginx.conf diff --git a/k8s/nginx/nginx-dep.yaml b/k8s/nginx/nginx-dep.yaml new file mode 100644 index 00000000..684ae552 --- /dev/null +++ b/k8s/nginx/nginx-dep.yaml @@ -0,0 +1,61 @@ +############################################################### +# This config file runs nginx as a k8s deployment and exposes # +# it using an external load balancer. # +# This deployment is used as a front end to both BigchainDB # +# and MongoDB. # +############################################################### + +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: ngx-instance-0-dep +spec: + replicas: 1 + template: + metadata: + labels: + app: ngx-instance-0-dep + spec: + terminationGracePeriodSeconds: 10 + containers: + - name: nginx + image: bigchaindb/nginx:1.0 + imagePullPolicy: IfNotPresent + env: + - name: MONGODB_FRONTEND_PORT + value: $(NGX_INSTANCE_0_SERVICE_PORT_NGX_PUBLIC_MDB_PORT) + - name: MONGODB_BACKEND_HOST + # NGINX requires FQDN to resolve names + value: mdb-instance-0.default.svc.cluster.local + - name: MONGODB_BACKEND_PORT + value: "27017" + - name: BIGCHAINDB_FRONTEND_PORT + value: $(NGX_INSTANCE_0_SERVICE_PORT_NGX_PUBLIC_BDB_PORT) + - name: BIGCHAINDB_BACKEND_HOST + # NGINX requires FQDN to resolve names + value: bdb-instance-0.default.svc.cluster.local + - name: BIGCHAINDB_BACKEND_PORT + value: "9984" + - name: DNS_SERVER + value: "10.0.0.10" + - name: MONGODB_WHITELIST + valueFrom: + configMapKeyRef: + name: mongodb-whitelist + key: allowed-hosts + ports: + - containerPort: 27017 + hostPort: 27017 + name: public-mdb-port + protocol: TCP + - containerPort: 80 + hostPort: 80 + name: public-bdb-port + protocol: TCP + resources: + limits: + cpu: 200m + memory: 768Mi + #livenessProbe: TODO(Krish) + #readinessProbe: TODO(Krish) + restartPolicy: Always diff --git a/k8s/nginx/nginx-svc.yaml b/k8s/nginx/nginx-svc.yaml new file mode 100644 index 00000000..8b0cded4 --- /dev/null +++ b/k8s/nginx/nginx-svc.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Service +metadata: + name: ngx-instance-0 + namespace: default + labels: + name: ngx-instance-0 + annotations: + # NOTE: the following annotation is a beta feature and + # only available in GCE/GKE and Azure as of now + service.beta.kubernetes.io/external-traffic: OnlyLocal +spec: + selector: + app: ngx-instance-0-dep + ports: + - port: 27017 + targetPort: 27017 + name: ngx-public-mdb-port + protocol: TCP + - port: 80 + targetPort: 80 + name: ngx-public-bdb-port + protocol: TCP + type: LoadBalancer diff --git a/k8s/toolbox/Dockerfile b/k8s/toolbox/Dockerfile new file mode 100644 index 00000000..c9adfb5e --- /dev/null +++ b/k8s/toolbox/Dockerfile @@ -0,0 +1,15 @@ +# Toolbox container for debugging +# Run as: +# docker run -it --rm --entrypoint sh bigchaindb/toolbox +# kubectl run -it toolbox --image bigchaindb/toolbox --restart=Never --rm + +FROM alpine:3.5 +LABEL maintainer "dev@bigchaindb.com" +WORKDIR / +RUN apk add --no-cache --update curl bind-tools python3-dev g++ \ + libffi-dev make vim git \ + && pip3 install ipython \ + && git clone https://github.com/bigchaindb/bigchaindb-driver \ + && cd bigchaindb-driver \ + && pip3 install -e . +ENTRYPOINT ["/bin/sh"] diff --git a/k8s/toolbox/README.md b/k8s/toolbox/README.md new file mode 100644 index 00000000..7a31f464 --- /dev/null +++ b/k8s/toolbox/README.md @@ -0,0 +1,14 @@ +## Docker container with debugging tools + +* curl +* bind-utils - provides nslookup, dig +* python3 +* make + +## Build + +`docker build -t bigchaindb/toolbox .` + +## Push + +`docker push bigchaindb/toolbox` diff --git a/setup.py b/setup.py index 7e8c3441..45d6f04f 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ dev_require = [ 'ipdb', 'ipython', 'watchdog', + 'logging_tree', ] docs_require = [ @@ -50,8 +51,10 @@ tests_require = [ 'pytest>=3.0.0', 'pytest-catchlog>=1.2.2', 'pytest-cov>=2.2.1', + 'pytest-mock', 'pytest-xdist', 'pytest-flask', + 'pytest-aiohttp', 'tox', ] + docs_require @@ -65,7 +68,7 @@ install_requires = [ 'pymongo~=3.4', 'pysha3~=1.0.2', 'cryptoconditions>=0.5.0', - 'python-rapidjson>=0.0.8', + 'python-rapidjson==0.0.11', 'logstats>=0.2.1', 'flask>=0.10.1', 'flask-restful~=0.3.0', @@ -74,6 +77,7 @@ install_requires = [ 'multipipes~=0.1.0', 'jsonschema~=2.5.1', 'pyyaml~=3.12', + 'aiohttp~=2.0', ] setup( @@ -115,7 +119,7 @@ setup( entry_points={ 'console_scripts': [ - 'bigchaindb=bigchaindb.commands.bigchain:main' + 'bigchaindb=bigchaindb.commands.bigchaindb:main' ], }, install_requires=install_requires, diff --git a/speed-tests/README.md b/speed-tests/README.md deleted file mode 100644 index 7b07d338..00000000 --- a/speed-tests/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Speed Tests - -This folder contains tests related to the code performance of a single node. \ No newline at end of file diff --git a/speed-tests/speed_tests.py b/speed-tests/speed_tests.py deleted file mode 100644 index 87a81b0f..00000000 --- a/speed-tests/speed_tests.py +++ /dev/null @@ -1,97 +0,0 @@ -import json -import time - -import rapidjson -from line_profiler import LineProfiler - -import bigchaindb - -# BIG TODO: Adjust for new transaction model - - -def speedtest_validate_transaction(): - # create a transaction - b = bigchaindb.Bigchain() - tx = b.create_transaction(b.me, b.me, None, 'CREATE') - tx_signed = b.sign_transaction(tx, b.me_private) - - # setup the profiler - profiler = LineProfiler() - profiler.enable_by_count() - profiler.add_function(bigchaindb.Bigchain.validate_transaction) - - # validate_transaction 1000 times - for i in range(1000): - b.validate_transaction(tx_signed) - - profiler.print_stats() - - -def speedtest_serialize_block_json(): - # create a block - b = bigchaindb.Bigchain() - tx = b.create_transaction(b.me, b.me, None, 'CREATE') - tx_signed = b.sign_transaction(tx, b.me_private) - block = b.create_block([tx_signed] * 1000) - - time_start = time.time() - for _ in range(1000): - _ = json.dumps(block, skipkeys=False, ensure_ascii=False, sort_keys=True) - time_elapsed = time.time() - time_start - - print('speedtest_serialize_block_json: {} s'.format(time_elapsed)) - - -def speedtest_serialize_block_rapidjson(): - # create a block - b = bigchaindb.Bigchain() - tx = b.create_transaction(b.me, b.me, None, 'CREATE') - tx_signed = b.sign_transaction(tx, b.me_private) - block = b.create_block([tx_signed] * 1000) - - time_start = time.time() - for _ in range(1000): - _ = rapidjson.dumps(block, skipkeys=False, ensure_ascii=False, sort_keys=True) - time_elapsed = time.time() - time_start - - print('speedtest_serialize_block_rapidjson: {} s'.format(time_elapsed)) - - -def speedtest_deserialize_block_json(): - # create a block - b = bigchaindb.Bigchain() - tx = b.create_transaction(b.me, b.me, None, 'CREATE') - tx_signed = b.sign_transaction(tx, b.me_private) - block = b.create_block([tx_signed] * 1000) - block_serialized = json.dumps(block, skipkeys=False, ensure_ascii=False, sort_keys=True) - - time_start = time.time() - for _ in range(1000): - _ = json.loads(block_serialized) - time_elapsed = time.time() - time_start - - print('speedtest_deserialize_block_json: {} s'.format(time_elapsed)) - - -def speedtest_deserialize_block_rapidjson(): - # create a block - b = bigchaindb.Bigchain() - tx = b.create_transaction(b.me, b.me, None, 'CREATE') - tx_signed = b.sign_transaction(tx, b.me_private) - block = b.create_block([tx_signed] * 1000) - block_serialized = rapidjson.dumps(block, skipkeys=False, ensure_ascii=False, sort_keys=True) - - time_start = time.time() - for _ in range(1000): - _ = rapidjson.loads(block_serialized) - time_elapsed = time.time() - time_start - - print('speedtest_deserialize_block_rapidjson: {} s'.format(time_elapsed)) - - -if __name__ == '__main__': - speedtest_validate_transaction() - speedtest_serialize_block_json() - speedtest_serialize_block_rapidjson() - speedtest_deserialize_block_json() - speedtest_deserialize_block_rapidjson() diff --git a/tests/README.md b/tests/README.md index ce4ac22c..d0e2da52 100644 --- a/tests/README.md +++ b/tests/README.md @@ -68,20 +68,6 @@ The `pytest` command has many options. If you want to learn about all the things You can also use [Docker Compose](https://docs.docker.com/compose/) to run all the tests. -#### With RethinkDB as the backend - -First, start `RethinkDB` in the background: - -```text -$ docker-compose up -d rdb -``` - -then run the tests using: - -```text -$ docker-compose run --rm bdb py.test -v -``` - #### With MongoDB as the backend First, start `MongoDB` in the background: @@ -93,7 +79,7 @@ $ docker-compose up -d mdb then run the tests using: ```text -$ docker-compose run --rm bdb-mdb py.test -v +$ docker-compose run --rm bdb py.test -v ``` If you've upgraded to a newer version of BigchainDB, you might have to rebuild @@ -103,8 +89,22 @@ the images before being able to run the tests. Run: $ docker-compose build ``` +#### With RethinkDB as the backend + +First, start `RethinkDB` in the background: + +```text +$ docker-compose up -d rdb +``` + +then run the tests using: + +```text +$ docker-compose run --rm bdb-rdb py.test -v +``` + to rebuild all the images (usually you only need to rebuild the `bdb` and - `bdb-mdb` images). + `bdb-rdb` images). ## Automated Testing of All Pull Requests diff --git a/tests/assets/test_digital_assets.py b/tests/assets/test_digital_assets.py index 1dc4764f..c31ec3da 100644 --- a/tests/assets/test_digital_assets.py +++ b/tests/assets/test_digital_assets.py @@ -1,3 +1,4 @@ +from bigchaindb.common.exceptions import ValidationError import pytest import random @@ -26,8 +27,8 @@ def test_validate_bad_asset_creation(b, user_pk): tx.asset['data'] = 'a' tx_signed = tx.sign([b.me_private]) - with pytest.raises(TypeError): - b.validate_transaction(tx_signed) + with pytest.raises(ValidationError): + Transaction.from_dict(tx_signed.to_dict()) @pytest.mark.bdb @@ -92,15 +93,15 @@ def test_asset_id_mismatch(b, user_pk): def test_create_invalid_divisible_asset(b, user_pk, user_sk): from bigchaindb.models import Transaction - from bigchaindb.common.exceptions import AmountError + from bigchaindb.common.exceptions import ValidationError # Asset amount must be more than 0 tx = Transaction.create([user_pk], [([user_pk], 1)]) tx.outputs[0].amount = 0 tx.sign([user_sk]) - with pytest.raises(AmountError): - b.validate_transaction(tx) + with pytest.raises(ValidationError): + Transaction.from_dict(tx.to_dict()) def test_create_valid_divisible_asset(b, user_pk, user_sk): @@ -108,4 +109,4 @@ def test_create_valid_divisible_asset(b, user_pk, user_sk): tx = Transaction.create([user_pk], [([user_pk], 2)]) tx_signed = tx.sign([user_sk]) - assert b.is_valid_transaction(tx_signed) + tx_signed.validate(b) diff --git a/tests/assets/test_divisible_assets.py b/tests/assets/test_divisible_assets.py index 31e7890f..87a29c2b 100644 --- a/tests/assets/test_divisible_assets.py +++ b/tests/assets/test_divisible_assets.py @@ -638,6 +638,7 @@ def test_divide(b, user_pk, user_sk): # Check that negative inputs are caught when creating a TRANSFER transaction +@pytest.mark.skip(reason='part of tx structural tests') @pytest.mark.bdb @pytest.mark.usefixtures('inputs') def test_non_positive_amounts_on_transfer(b, user_pk): @@ -662,6 +663,7 @@ def test_non_positive_amounts_on_transfer(b, user_pk): # Check that negative inputs are caught when validating a TRANSFER transaction +@pytest.mark.skip(reason='part of tx structural tests') @pytest.mark.bdb @pytest.mark.usefixtures('inputs') def test_non_positive_amounts_on_transfer_validate(b, user_pk, user_sk): @@ -704,6 +706,7 @@ def test_non_positive_amounts_on_create(b, user_pk): # Check that negative inputs are caught when validating a CREATE transaction +@pytest.mark.skip(reason='part of tx structural tests') @pytest.mark.bdb @pytest.mark.usefixtures('inputs') def test_non_positive_amounts_on_create_validate(b, user_pk): diff --git a/tests/backend/mongodb/test_admin.py b/tests/backend/mongodb/test_admin.py index 148c853a..075ea2f9 100644 --- a/tests/backend/mongodb/test_admin.py +++ b/tests/backend/mongodb/test_admin.py @@ -40,7 +40,7 @@ def connection(): # executed to make sure that the replica set is correctly initialized. # Here we force the the connection setup so that all required # `Database.command` are executed before we mock them it in the tests. - connection._connect() + connection.connect() return connection diff --git a/tests/backend/mongodb/test_connection.py b/tests/backend/mongodb/test_connection.py index 786b7d7b..3edc31b1 100644 --- a/tests/backend/mongodb/test_connection.py +++ b/tests/backend/mongodb/test_connection.py @@ -32,15 +32,15 @@ def mongodb_connection(): port=bigchaindb.config['database']['port']) -def test_get_connection_returns_the_correct_instance(): +def test_get_connection_returns_the_correct_instance(db_host, db_port): from bigchaindb.backend import connect from bigchaindb.backend.connection import Connection from bigchaindb.backend.mongodb.connection import MongoDBConnection config = { 'backend': 'mongodb', - 'host': 'localhost', - 'port': 27017, + 'host': db_host, + 'port': db_port, 'name': 'test', 'replicaset': 'bigchain-rs' } @@ -99,6 +99,18 @@ def test_connection_run_errors(mock_client, mock_init_repl_set): assert query.run.call_count == 1 +@mock.patch('pymongo.database.Database.authenticate') +def test_connection_with_credentials(mock_authenticate): + import bigchaindb + from bigchaindb.backend.mongodb.connection import MongoDBConnection + conn = MongoDBConnection(host=bigchaindb.config['database']['host'], + port=bigchaindb.config['database']['port'], + login='theplague', + password='secret') + conn.connect() + assert mock_authenticate.call_count == 2 + + def test_check_replica_set_not_enabled(mongodb_connection): from bigchaindb.backend.mongodb.connection import _check_replica_set from bigchaindb.common.exceptions import ConfigurationError @@ -168,7 +180,7 @@ def test_initialize_replica_set(mock_cmd_line_opts): ] # check that it returns - assert initialize_replica_set('host', 1337, 1000) is None + assert initialize_replica_set('host', 1337, 1000, 'dbname', False, None, None) is None # test it raises OperationError if anything wrong with mock.patch.object(Database, 'command') as mock_command: @@ -178,4 +190,4 @@ def test_initialize_replica_set(mock_cmd_line_opts): ] with pytest.raises(pymongo.errors.OperationFailure): - initialize_replica_set('host', 1337, 1000) + initialize_replica_set('host', 1337, 1000, 'dbname', False, None, None) diff --git a/tests/backend/mongodb/test_queries.py b/tests/backend/mongodb/test_queries.py index 80e3cc91..bd7e75f1 100644 --- a/tests/backend/mongodb/test_queries.py +++ b/tests/backend/mongodb/test_queries.py @@ -159,6 +159,43 @@ def test_get_spent(signed_create_tx, signed_transfer_tx): assert spents[0] == signed_transfer_tx.to_dict() +def test_get_spent_for_tx_with_multiple_inputs(carol): + from bigchaindb.backend import connect, query + from bigchaindb.models import Block, Transaction + conn = connect() + tx_0 = Transaction.create( + [carol.public_key], + [([carol.public_key], 1), + ([carol.public_key], 1), + ([carol.public_key], 2)], + ).sign([carol.private_key]) + block = Block(transactions=[tx_0]) + conn.db.bigchain.insert_one(block.to_dict()) + spents = list(query.get_spent(conn, tx_0.id, 0)) + assert not spents + + tx_1 = Transaction.transfer( + tx_0.to_inputs()[2:3], + [([carol.public_key], 1), + ([carol.public_key], 1)], + asset_id=tx_0.id, + ).sign([carol.private_key]) + block = Block(transactions=[tx_1]) + conn.db.bigchain.insert_one(block.to_dict()) + spents = list(query.get_spent(conn, tx_0.id, 0)) + assert not spents + + tx_2 = Transaction.transfer( + tx_0.to_inputs()[0:1] + tx_1.to_inputs()[1:2], + [([carol.public_key], 2)], + asset_id=tx_0.id, + ).sign([carol.private_key]) + block = Block(transactions=[tx_2]) + conn.db.bigchain.insert_one(block.to_dict()) + spents = list(query.get_spent(conn, tx_0.id, 1)) + assert not spents + + def test_get_owned_ids(signed_create_tx, user_pk): from bigchaindb.backend import connect, query from bigchaindb.models import Block @@ -175,6 +212,7 @@ def test_get_owned_ids(signed_create_tx, user_pk): def test_get_votes_by_block_id(signed_create_tx, structurally_valid_vote): + from bigchaindb.common.crypto import generate_key_pair from bigchaindb.backend import connect, query from bigchaindb.models import Block conn = connect() @@ -182,10 +220,14 @@ def test_get_votes_by_block_id(signed_create_tx, structurally_valid_vote): # create and insert a block block = Block(transactions=[signed_create_tx]) conn.db.bigchain.insert_one(block.to_dict()) + # create and insert some votes structurally_valid_vote['vote']['voting_for_block'] = block.id conn.db.votes.insert_one(structurally_valid_vote) + # create a second vote under a different key + _, pk = generate_key_pair() structurally_valid_vote['vote']['voting_for_block'] = block.id + structurally_valid_vote['node_pubkey'] = pk structurally_valid_vote.pop('_id') conn.db.votes.insert_one(structurally_valid_vote) @@ -288,6 +330,19 @@ def test_write_vote(structurally_valid_vote): assert vote_db == structurally_valid_vote +def test_duplicate_vote_raises_duplicate_key(structurally_valid_vote): + from bigchaindb.backend import connect, query + from bigchaindb.backend.exceptions import DuplicateKeyError + conn = connect() + + # write a vote + query.write_vote(conn, structurally_valid_vote) + + # write the same vote a second time + with pytest.raises(DuplicateKeyError): + query.write_vote(conn, structurally_valid_vote) + + def test_get_genesis_block(genesis_block): from bigchaindb.backend import connect, query conn = connect() diff --git a/tests/backend/rethinkdb/test_connection.py b/tests/backend/rethinkdb/test_connection.py index 073fecee..df393716 100644 --- a/tests/backend/rethinkdb/test_connection.py +++ b/tests/backend/rethinkdb/test_connection.py @@ -34,6 +34,7 @@ def test_run_a_simple_query(): def test_raise_exception_when_max_tries(): from bigchaindb.backend import connect + from bigchaindb.backend.exceptions import OperationError class MockQuery: def run(self, conn): @@ -41,28 +42,41 @@ def test_raise_exception_when_max_tries(): conn = connect() - with pytest.raises(r.ReqlDriverError): + with pytest.raises(OperationError): conn.run(MockQuery()) -def test_reconnect_when_connection_lost(): +def test_reconnect_when_connection_lost(db_host, db_port): from bigchaindb.backend import connect - def raise_exception(*args, **kwargs): - raise r.ReqlDriverError('mock') - - conn = connect() original_connect = r.connect - r.connect = raise_exception - def delayed_start(): - time.sleep(1) - r.connect = original_connect + with patch('rethinkdb.connect') as mock_connect: + mock_connect.side_effect = [ + r.ReqlDriverError('mock'), + original_connect(host=db_host, port=db_port) + ] - thread = Thread(target=delayed_start) - query = r.expr('1') - thread.start() - assert conn.run(query) == '1' + conn = connect() + query = r.expr('1') + assert conn.run(query) == '1' + + +def test_reconnect_when_connection_lost_tries_n_times(): + from bigchaindb.backend import connect + from bigchaindb.backend.exceptions import ConnectionError + + with patch('rethinkdb.connect') as mock_connect: + mock_connect.side_effect = [ + r.ReqlDriverError('mock'), + r.ReqlDriverError('mock'), + r.ReqlDriverError('mock') + ] + + conn = connect(max_tries=3) + query = r.expr('1') + with pytest.raises(ConnectionError): + assert conn.run(query) == '1' def test_changefeed_reconnects_when_connection_lost(monkeypatch): diff --git a/tests/commands/conftest.py b/tests/commands/conftest.py index 1cffbc2f..4a60c0cc 100644 --- a/tests/commands/conftest.py +++ b/tests/commands/conftest.py @@ -1,10 +1,12 @@ +from argparse import Namespace + import pytest @pytest.fixture def mock_run_configure(monkeypatch): - from bigchaindb.commands import bigchain - monkeypatch.setattr(bigchain, 'run_configure', lambda *args, **kwargs: None) + from bigchaindb.commands import bigchaindb + monkeypatch.setattr(bigchaindb, 'run_configure', lambda *args, **kwargs: None) @pytest.fixture @@ -15,8 +17,8 @@ def mock_write_config(monkeypatch): @pytest.fixture def mock_db_init_with_existing_db(monkeypatch): - from bigchaindb.commands import bigchain - monkeypatch.setattr(bigchain, '_run_init', lambda: None) + from bigchaindb.commands import bigchaindb + monkeypatch.setattr(bigchaindb, '_run_init', lambda: None) @pytest.fixture @@ -38,3 +40,22 @@ def mock_bigchaindb_backup_config(monkeypatch): 'backlog_reassign_delay': 5 } monkeypatch.setattr('bigchaindb._config', config) + + +@pytest.fixture +def run_start_args(request): + param = getattr(request, 'param', {}) + return Namespace( + config=param.get('config'), + start_rethinkdb=param.get('start_rethinkdb', False), + allow_temp_keypair=param.get('allow_temp_keypair', False), + ) + + +@pytest.fixture +def mocked_setup_logging(mocker): + return mocker.patch( + 'bigchaindb.commands.utils.setup_logging', + autospec=True, + spec_set=True, + ) diff --git a/tests/commands/rethinkdb/test_commands.py b/tests/commands/rethinkdb/test_commands.py index 5fb75f4d..e40b3ff2 100644 --- a/tests/commands/rethinkdb/test_commands.py +++ b/tests/commands/rethinkdb/test_commands.py @@ -9,12 +9,15 @@ from argparse import Namespace def test_bigchain_run_start_with_rethinkdb(mock_start_rethinkdb, mock_run_configure, mock_processes_start, - mock_db_init_with_existing_db): - from bigchaindb.commands.bigchain import run_start + mock_db_init_with_existing_db, + mocked_setup_logging): + from bigchaindb import config + from bigchaindb.commands.bigchaindb import run_start args = Namespace(start_rethinkdb=True, allow_temp_keypair=False, config=None, yes=True) run_start(args) mock_start_rethinkdb.assert_called_with() + mocked_setup_logging.assert_called_once_with(user_log_config=config['log']) @patch('subprocess.Popen') @@ -37,7 +40,7 @@ def test_start_rethinkdb_exits_when_cannot_start(mock_popen): @patch('rethinkdb.ast.Table.reconfigure') def test_set_shards(mock_reconfigure, monkeypatch, b): - from bigchaindb.commands.bigchain import run_set_shards + from bigchaindb.commands.bigchaindb import run_set_shards # this will mock the call to retrieve the database config # we will set it to return one replica @@ -45,7 +48,7 @@ def test_set_shards(mock_reconfigure, monkeypatch, b): return {'shards': [{'replicas': [1]}]} monkeypatch.setattr(rethinkdb.RqlQuery, 'run', mockreturn_one_replica) - args = Namespace(num_shards=3) + args = Namespace(num_shards=3, config=None) run_set_shards(args) mock_reconfigure.assert_called_with(replicas=1, shards=3, dry_run=False) @@ -59,9 +62,8 @@ def test_set_shards(mock_reconfigure, monkeypatch, b): mock_reconfigure.assert_called_with(replicas=3, shards=3, dry_run=False) -@patch('logging.Logger.warn') -def test_set_shards_raises_exception(mock_log, monkeypatch, b): - from bigchaindb.commands.bigchain import run_set_shards +def test_set_shards_raises_exception(monkeypatch, b): + from bigchaindb.commands.bigchaindb import run_set_shards # test that we are correctly catching the exception def mock_raise(*args, **kwargs): @@ -73,15 +75,15 @@ def test_set_shards_raises_exception(mock_log, monkeypatch, b): monkeypatch.setattr(rethinkdb.RqlQuery, 'run', mockreturn_one_replica) monkeypatch.setattr(rethinkdb.ast.Table, 'reconfigure', mock_raise) - args = Namespace(num_shards=3) - run_set_shards(args) - - assert mock_log.called + args = Namespace(num_shards=3, config=None) + with pytest.raises(SystemExit) as exc: + run_set_shards(args) + assert exc.value.args == ('Failed to reconfigure tables.',) @patch('rethinkdb.ast.Table.reconfigure') def test_set_replicas(mock_reconfigure, monkeypatch, b): - from bigchaindb.commands.bigchain import run_set_replicas + from bigchaindb.commands.bigchaindb import run_set_replicas # this will mock the call to retrieve the database config # we will set it to return two shards @@ -89,7 +91,7 @@ def test_set_replicas(mock_reconfigure, monkeypatch, b): return {'shards': [1, 2]} monkeypatch.setattr(rethinkdb.RqlQuery, 'run', mockreturn_two_shards) - args = Namespace(num_replicas=2) + args = Namespace(num_replicas=2, config=None) run_set_replicas(args) mock_reconfigure.assert_called_with(replicas=2, shards=2, dry_run=False) @@ -103,9 +105,8 @@ def test_set_replicas(mock_reconfigure, monkeypatch, b): mock_reconfigure.assert_called_with(replicas=2, shards=3, dry_run=False) -@patch('logging.Logger.warn') -def test_set_replicas_raises_exception(mock_log, monkeypatch, b): - from bigchaindb.commands.bigchain import run_set_replicas +def test_set_replicas_raises_exception(monkeypatch, b): + from bigchaindb.commands.bigchaindb import run_set_replicas # test that we are correctly catching the exception def mock_raise(*args, **kwargs): @@ -117,7 +118,7 @@ def test_set_replicas_raises_exception(mock_log, monkeypatch, b): monkeypatch.setattr(rethinkdb.RqlQuery, 'run', mockreturn_two_shards) monkeypatch.setattr(rethinkdb.ast.Table, 'reconfigure', mock_raise) - args = Namespace(num_replicas=2) - run_set_replicas(args) - - assert mock_log.called + args = Namespace(num_replicas=2, config=None) + with pytest.raises(SystemExit) as exc: + run_set_replicas(args) + assert exc.value.args == ('Failed to reconfigure tables.',) diff --git a/tests/commands/test_commands.py b/tests/commands/test_commands.py index f806eb7c..37079ddd 100644 --- a/tests/commands/test_commands.py +++ b/tests/commands/test_commands.py @@ -1,6 +1,6 @@ import json from unittest.mock import Mock, patch -from argparse import Namespace, ArgumentTypeError +from argparse import Namespace import copy import pytest @@ -8,7 +8,7 @@ import pytest def test_make_sure_we_dont_remove_any_command(): # thanks to: http://stackoverflow.com/a/18161115/597097 - from bigchaindb.commands.bigchain import create_parser + from bigchaindb.commands.bigchaindb import create_parser parser = create_parser() @@ -21,66 +21,35 @@ def test_make_sure_we_dont_remove_any_command(): assert parser.parse_args(['start']).command assert parser.parse_args(['set-shards', '1']).command assert parser.parse_args(['set-replicas', '1']).command - assert parser.parse_args(['load']).command assert parser.parse_args(['add-replicas', 'localhost:27017']).command assert parser.parse_args(['remove-replicas', 'localhost:27017']).command -def test_start_raises_if_command_not_implemented(): - from bigchaindb.commands.bigchain import utils - from bigchaindb.commands.bigchain import create_parser - - parser = create_parser() - - with pytest.raises(NotImplementedError): - # Will raise because `scope`, the third parameter, - # doesn't contain the function `run_start` - utils.start(parser, ['start'], {}) - - -def test_start_raises_if_no_arguments_given(): - from bigchaindb.commands.bigchain import utils - from bigchaindb.commands.bigchain import create_parser - - parser = create_parser() - - with pytest.raises(SystemExit): - utils.start(parser, [], {}) - - -@patch('multiprocessing.cpu_count', return_value=42) -def test_start_sets_multiprocess_var_based_on_cli_args(mock_cpu_count): - from bigchaindb.commands.bigchain import utils - from bigchaindb.commands.bigchain import create_parser - - def run_load(args): - return args - - parser = create_parser() - - assert utils.start(parser, ['load'], {'run_load': run_load}).multiprocess == 1 - assert utils.start(parser, ['load', '--multiprocess'], {'run_load': run_load}).multiprocess == 42 - - @patch('bigchaindb.commands.utils.start') def test_main_entrypoint(mock_start): - from bigchaindb.commands.bigchain import main + from bigchaindb.commands.bigchaindb import main main() assert mock_start.called -def test_bigchain_run_start(mock_run_configure, mock_processes_start, mock_db_init_with_existing_db): - from bigchaindb.commands.bigchain import run_start +def test_bigchain_run_start(mock_run_configure, + mock_processes_start, + mock_db_init_with_existing_db, + mocked_setup_logging): + from bigchaindb import config + from bigchaindb.commands.bigchaindb import run_start args = Namespace(start_rethinkdb=False, allow_temp_keypair=False, config=None, yes=True) run_start(args) + mocked_setup_logging.assert_called_once_with(user_log_config=config['log']) @pytest.mark.skipif(reason="BigchainDB doesn't support the automatic creation of a config file anymore") -def test_bigchain_run_start_assume_yes_create_default_config(monkeypatch, mock_processes_start, - mock_generate_key_pair, mock_db_init_with_existing_db): +def test_bigchain_run_start_assume_yes_create_default_config( + monkeypatch, mock_processes_start, mock_generate_key_pair, + mock_db_init_with_existing_db, mocked_setup_logging): import bigchaindb - from bigchaindb.commands.bigchain import run_start + from bigchaindb.commands.bigchaindb import run_start from bigchaindb import config_utils value = {} @@ -98,6 +67,7 @@ def test_bigchain_run_start_assume_yes_create_default_config(monkeypatch, mock_p args = Namespace(config=None, yes=True) run_start(args) + mocked_setup_logging.assert_called_once_with() assert value['return'] == expected_config @@ -107,7 +77,7 @@ def test_bigchain_run_start_assume_yes_create_default_config(monkeypatch, mock_p @pytest.mark.usefixtures('ignore_local_config_file') def test_bigchain_show_config(capsys): from bigchaindb import config - from bigchaindb.commands.bigchain import run_show_config + from bigchaindb.commands.bigchaindb import run_show_config args = Namespace(config=None) _, _ = capsys.readouterr() @@ -120,7 +90,7 @@ def test_bigchain_show_config(capsys): def test_bigchain_export_my_pubkey_when_pubkey_set(capsys, monkeypatch): from bigchaindb import config - from bigchaindb.commands.bigchain import run_export_my_pubkey + from bigchaindb.commands.bigchaindb import run_export_my_pubkey args = Namespace(config='dummy') # so in run_export_my_pubkey(args) below, @@ -131,14 +101,15 @@ def test_bigchain_export_my_pubkey_when_pubkey_set(capsys, monkeypatch): monkeypatch.setitem(config['keypair'], 'public', 'Charlie_Bucket') _, _ = capsys.readouterr() # has the effect of clearing capsys run_export_my_pubkey(args) - out, err = capsys.readouterr() - assert out == config['keypair']['public'] + '\n' - assert out == 'Charlie_Bucket\n' + out, _ = capsys.readouterr() + lines = out.splitlines() + assert config['keypair']['public'] in lines + assert 'Charlie_Bucket' in lines def test_bigchain_export_my_pubkey_when_pubkey_not_set(monkeypatch): from bigchaindb import config - from bigchaindb.commands.bigchain import run_export_my_pubkey + from bigchaindb.commands.bigchaindb import run_export_my_pubkey args = Namespace(config='dummy') monkeypatch.setitem(config['keypair'], 'public', None) @@ -154,15 +125,44 @@ def test_bigchain_export_my_pubkey_when_pubkey_not_set(monkeypatch): "This node's public key wasn't set anywhere so it can't be exported" -def test_bigchain_run_init_when_db_exists(mock_db_init_with_existing_db): - from bigchaindb.commands.bigchain import run_init +def test_bigchain_run_init_when_db_exists(mocker, capsys): + from bigchaindb.commands.bigchaindb import run_init + from bigchaindb.common.exceptions import DatabaseAlreadyExists + init_db_mock = mocker.patch( + 'bigchaindb.commands.bigchaindb.schema.init_database', + autospec=True, + spec_set=True, + ) + init_db_mock.side_effect = DatabaseAlreadyExists args = Namespace(config=None) run_init(args) + output_message = capsys.readouterr()[1] + print(output_message) + assert output_message == ( + 'The database already exists.\n' + 'If you wish to re-initialize it, first drop it.\n' + ) + + +def test__run_init(mocker): + from bigchaindb.commands.bigchaindb import _run_init + bigchain_mock = mocker.patch( + 'bigchaindb.commands.bigchaindb.bigchaindb.Bigchain') + init_db_mock = mocker.patch( + 'bigchaindb.commands.bigchaindb.schema.init_database', + autospec=True, + spec_set=True, + ) + _run_init() + bigchain_mock.assert_called_once_with() + init_db_mock.assert_called_once_with( + connection=bigchain_mock.return_value.connection) + bigchain_mock.return_value.create_genesis_block.assert_called_once_with() @patch('bigchaindb.backend.schema.drop_database') def test_drop_db_when_assumed_yes(mock_db_drop): - from bigchaindb.commands.bigchain import run_drop + from bigchaindb.commands.bigchaindb import run_drop args = Namespace(config=None, yes=True) run_drop(args) @@ -171,26 +171,40 @@ def test_drop_db_when_assumed_yes(mock_db_drop): @patch('bigchaindb.backend.schema.drop_database') def test_drop_db_when_interactive_yes(mock_db_drop, monkeypatch): - from bigchaindb.commands.bigchain import run_drop + from bigchaindb.commands.bigchaindb import run_drop args = Namespace(config=None, yes=False) - monkeypatch.setattr('bigchaindb.commands.bigchain.input_on_stderr', lambda x: 'y') + monkeypatch.setattr('bigchaindb.commands.bigchaindb.input_on_stderr', lambda x: 'y') run_drop(args) assert mock_db_drop.called +@patch('bigchaindb.backend.schema.drop_database') +def test_drop_db_when_db_does_not_exist(mock_db_drop, capsys): + from bigchaindb import config + from bigchaindb.commands.bigchaindb import run_drop + from bigchaindb.common.exceptions import DatabaseDoesNotExist + args = Namespace(config=None, yes=True) + mock_db_drop.side_effect = DatabaseDoesNotExist + + run_drop(args) + output_message = capsys.readouterr()[1] + assert output_message == "Cannot drop '{name}'. The database does not exist.\n".format( + name=config['database']['name']) + + @patch('bigchaindb.backend.schema.drop_database') def test_drop_db_does_not_drop_when_interactive_no(mock_db_drop, monkeypatch): - from bigchaindb.commands.bigchain import run_drop + from bigchaindb.commands.bigchaindb import run_drop args = Namespace(config=None, yes=False) - monkeypatch.setattr('bigchaindb.commands.bigchain.input_on_stderr', lambda x: 'n') + monkeypatch.setattr('bigchaindb.commands.bigchaindb.input_on_stderr', lambda x: 'n') run_drop(args) assert not mock_db_drop.called def test_run_configure_when_config_exists_and_skipping(monkeypatch): - from bigchaindb.commands.bigchain import run_configure + from bigchaindb.commands.bigchaindb import run_configure monkeypatch.setattr('os.path.exists', lambda path: True) args = Namespace(config='foo', yes=True) return_value = run_configure(args, skip_if_exists=True) @@ -204,7 +218,7 @@ def test_run_configure_when_config_does_not_exist(monkeypatch, mock_write_config, mock_generate_key_pair, mock_bigchaindb_backup_config): - from bigchaindb.commands.bigchain import run_configure + from bigchaindb.commands.bigchaindb import run_configure monkeypatch.setattr('os.path.exists', lambda path: False) monkeypatch.setattr('builtins.input', lambda: '\n') args = Namespace(config='foo', backend='rethinkdb', yes=True) @@ -221,7 +235,7 @@ def test_run_configure_when_config_does_exist(monkeypatch, def mock_write_config(newconfig, filename=None): value['return'] = newconfig - from bigchaindb.commands.bigchain import run_configure + from bigchaindb.commands.bigchaindb import run_configure monkeypatch.setattr('os.path.exists', lambda path: True) monkeypatch.setattr('builtins.input', lambda: '\n') monkeypatch.setattr('bigchaindb.config_utils.write_config', mock_write_config) @@ -237,7 +251,7 @@ def test_run_configure_when_config_does_exist(monkeypatch, )) def test_run_configure_with_backend(backend, monkeypatch, mock_write_config): import bigchaindb - from bigchaindb.commands.bigchain import run_configure + from bigchaindb.commands.bigchaindb import run_configure value = {} @@ -264,17 +278,19 @@ def test_run_configure_with_backend(backend, monkeypatch, mock_write_config): @patch('bigchaindb.common.crypto.generate_key_pair', return_value=('private_key', 'public_key')) @pytest.mark.usefixtures('ignore_local_config_file') -def test_allow_temp_keypair_generates_one_on_the_fly(mock_gen_keypair, - mock_processes_start, - mock_db_init_with_existing_db): +def test_allow_temp_keypair_generates_one_on_the_fly( + mock_gen_keypair, mock_processes_start, + mock_db_init_with_existing_db, mocked_setup_logging): import bigchaindb - from bigchaindb.commands.bigchain import run_start + from bigchaindb.commands.bigchaindb import run_start bigchaindb.config['keypair'] = {'private': None, 'public': None} args = Namespace(allow_temp_keypair=True, start_rethinkdb=False, config=None, yes=True) run_start(args) + mocked_setup_logging.assert_called_once_with( + user_log_config=bigchaindb.config['log']) assert bigchaindb.config['keypair']['private'] == 'private_key' assert bigchaindb.config['keypair']['public'] == 'public_key' @@ -284,9 +300,10 @@ def test_allow_temp_keypair_generates_one_on_the_fly(mock_gen_keypair, @pytest.mark.usefixtures('ignore_local_config_file') def test_allow_temp_keypair_doesnt_override_if_keypair_found(mock_gen_keypair, mock_processes_start, - mock_db_init_with_existing_db): + mock_db_init_with_existing_db, + mocked_setup_logging): import bigchaindb - from bigchaindb.commands.bigchain import run_start + from bigchaindb.commands.bigchaindb import run_start # Preconditions for the test original_private_key = bigchaindb.config['keypair']['private'] @@ -298,16 +315,89 @@ def test_allow_temp_keypair_doesnt_override_if_keypair_found(mock_gen_keypair, args = Namespace(allow_temp_keypair=True, start_rethinkdb=False, config=None, yes=True) run_start(args) + mocked_setup_logging.assert_called_once_with( + user_log_config=bigchaindb.config['log']) assert bigchaindb.config['keypair']['private'] == original_private_key assert bigchaindb.config['keypair']['public'] == original_public_key +def test_run_start_when_db_already_exists(mocker, + monkeypatch, + run_start_args, + mocked_setup_logging): + from bigchaindb import config + from bigchaindb.commands.bigchaindb import run_start + from bigchaindb.common.exceptions import DatabaseAlreadyExists + mocked_start = mocker.patch('bigchaindb.processes.start') + + def mock_run_init(): + raise DatabaseAlreadyExists() + + monkeypatch.setattr( + 'bigchaindb.commands.bigchaindb._run_init', mock_run_init) + run_start(run_start_args) + mocked_setup_logging.assert_called_once_with(user_log_config=config['log']) + assert mocked_start.called + + +def test_run_start_when_keypair_not_found(mocker, + monkeypatch, + run_start_args, + mocked_setup_logging): + from bigchaindb import config + from bigchaindb.commands.bigchaindb import run_start + from bigchaindb.commands.messages import CANNOT_START_KEYPAIR_NOT_FOUND + from bigchaindb.common.exceptions import KeypairNotFoundException + mocked_start = mocker.patch('bigchaindb.processes.start') + + def mock_run_init(): + raise KeypairNotFoundException() + + monkeypatch.setattr( + 'bigchaindb.commands.bigchaindb._run_init', mock_run_init) + + with pytest.raises(SystemExit) as exc: + run_start(run_start_args) + + mocked_setup_logging.assert_called_once_with(user_log_config=config['log']) + assert len(exc.value.args) == 1 + assert exc.value.args[0] == CANNOT_START_KEYPAIR_NOT_FOUND + assert not mocked_start.called + + +def test_run_start_when_start_rethinkdb_fails(mocker, + monkeypatch, + run_start_args, + mocked_setup_logging): + from bigchaindb import config + from bigchaindb.commands.bigchaindb import run_start + from bigchaindb.commands.messages import RETHINKDB_STARTUP_ERROR + from bigchaindb.common.exceptions import StartupError + run_start_args.start_rethinkdb = True + mocked_start = mocker.patch('bigchaindb.processes.start') + err_msg = 'Error starting rethinkdb.' + + def mock_start_rethinkdb(): + raise StartupError(err_msg) + + monkeypatch.setattr( + 'bigchaindb.commands.utils.start_rethinkdb', mock_start_rethinkdb) + + with pytest.raises(SystemExit) as exc: + run_start(run_start_args) + + mocked_setup_logging.assert_called_once_with(user_log_config=config['log']) + assert len(exc.value.args) == 1 + assert exc.value.args[0] == RETHINKDB_STARTUP_ERROR.format(err_msg) + assert not mocked_start.called + + @patch('argparse.ArgumentParser.parse_args') @patch('bigchaindb.commands.utils.base_parser') @patch('bigchaindb.commands.utils.start') def test_calling_main(start_mock, base_parser_mock, parse_args_mock, monkeypatch): - from bigchaindb.commands.bigchain import main + from bigchaindb.commands.bigchaindb import main argparser_mock = Mock() parser = Mock() @@ -320,11 +410,6 @@ def test_calling_main(start_mock, base_parser_mock, parse_args_mock, main() assert argparser_mock.called is True - assert parser.add_argument.called is True - parser.add_argument.assert_any_call('--dev-start-rethinkdb', - dest='start_rethinkdb', - action='store_true', - help='Run RethinkDB on start') parser.add_subparsers.assert_called_with(title='Commands', dest='command') subparsers.add_parser.assert_any_call('configure', @@ -338,11 +423,19 @@ def test_calling_main(start_mock, base_parser_mock, parse_args_mock, 'key') subparsers.add_parser.assert_any_call('init', help='Init the database') subparsers.add_parser.assert_any_call('drop', help='Drop the database') + subparsers.add_parser.assert_any_call('start', help='Start BigchainDB') + subsubparsers.add_argument.assert_any_call('--dev-start-rethinkdb', + dest='start_rethinkdb', + action='store_true', + help='Run RethinkDB on start') + subsubparsers.add_argument.assert_any_call('--dev-allow-temp-keypair', + dest='allow_temp_keypair', + action='store_true', + help='Generate a random keypair on start') subparsers.add_parser.assert_any_call('set-shards', help='Configure number of shards') - subsubparsers.add_argument.assert_any_call('num_shards', metavar='num_shards', type=int, default=1, @@ -356,34 +449,13 @@ def test_calling_main(start_mock, base_parser_mock, parse_args_mock, help='Number of replicas (i.e. ' 'the replication factor)') - subparsers.add_parser.assert_any_call('load', - help='Write transactions to the ' - 'backlog') - - subsubparsers.add_argument.assert_any_call('-m', '--multiprocess', - nargs='?', type=int, - default=False, - help='Spawn multiple processes ' - 'to run the command, if no ' - 'value is provided, the number ' - 'of processes is equal to the ' - 'number of cores of the host ' - 'machine') - subsubparsers.add_argument.assert_any_call('-c', '--count', - default=0, - type=int, - help='Number of transactions ' - 'to push. If the parameter -m ' - 'is set, the count is ' - 'distributed equally to all ' - 'the processes') assert start_mock.called is True @pytest.mark.usefixtures('ignore_local_config_file') -@patch('bigchaindb.commands.bigchain.add_replicas') +@patch('bigchaindb.commands.bigchaindb.add_replicas') def test_run_add_replicas(mock_add_replicas): - from bigchaindb.commands.bigchain import run_add_replicas + from bigchaindb.commands.bigchaindb import run_add_replicas from bigchaindb.backend.exceptions import OperationError args = Namespace(config=None, replicas=['localhost:27017']) @@ -395,22 +467,26 @@ def test_run_add_replicas(mock_add_replicas): mock_add_replicas.reset_mock() # test add_replicas with `OperationError` - mock_add_replicas.side_effect = OperationError() - assert run_add_replicas(args) is None + mock_add_replicas.side_effect = OperationError('err') + with pytest.raises(SystemExit) as exc: + run_add_replicas(args) + assert exc.value.args == ('err',) assert mock_add_replicas.call_count == 1 mock_add_replicas.reset_mock() # test add_replicas with `NotImplementedError` - mock_add_replicas.side_effect = NotImplementedError() - assert run_add_replicas(args) is None + mock_add_replicas.side_effect = NotImplementedError('err') + with pytest.raises(SystemExit) as exc: + run_add_replicas(args) + assert exc.value.args == ('err',) assert mock_add_replicas.call_count == 1 mock_add_replicas.reset_mock() @pytest.mark.usefixtures('ignore_local_config_file') -@patch('bigchaindb.commands.bigchain.remove_replicas') +@patch('bigchaindb.commands.bigchaindb.remove_replicas') def test_run_remove_replicas(mock_remove_replicas): - from bigchaindb.commands.bigchain import run_remove_replicas + from bigchaindb.commands.bigchaindb import run_remove_replicas from bigchaindb.backend.exceptions import OperationError args = Namespace(config=None, replicas=['localhost:27017']) @@ -422,29 +498,17 @@ def test_run_remove_replicas(mock_remove_replicas): mock_remove_replicas.reset_mock() # test add_replicas with `OperationError` - mock_remove_replicas.side_effect = OperationError() - assert run_remove_replicas(args) is None + mock_remove_replicas.side_effect = OperationError('err') + with pytest.raises(SystemExit) as exc: + run_remove_replicas(args) + assert exc.value.args == ('err',) assert mock_remove_replicas.call_count == 1 mock_remove_replicas.reset_mock() # test add_replicas with `NotImplementedError` - mock_remove_replicas.side_effect = NotImplementedError() - assert run_remove_replicas(args) is None + mock_remove_replicas.side_effect = NotImplementedError('err') + with pytest.raises(SystemExit) as exc: + run_remove_replicas(args) + assert exc.value.args == ('err',) assert mock_remove_replicas.call_count == 1 mock_remove_replicas.reset_mock() - - -def test_mongodb_host_type(): - from bigchaindb.commands.utils import mongodb_host - - # bad port provided - with pytest.raises(ArgumentTypeError): - mongodb_host('localhost:11111111111') - - # no port information provided - with pytest.raises(ArgumentTypeError): - mongodb_host('localhost') - - # bad host provided - with pytest.raises(ArgumentTypeError): - mongodb_host(':27017') diff --git a/tests/commands/test_utils.py b/tests/commands/test_utils.py new file mode 100644 index 00000000..0ddec6ef --- /dev/null +++ b/tests/commands/test_utils.py @@ -0,0 +1,138 @@ +import argparse +from argparse import ArgumentTypeError, Namespace +import logging + +import pytest + +from unittest.mock import patch + + +@pytest.fixture +def reset_bigchaindb_config(monkeypatch): + import bigchaindb + monkeypatch.setattr('bigchaindb.config', bigchaindb._config) + + +def test_input_on_stderr(): + from bigchaindb.commands.utils import input_on_stderr, _convert + + with patch('builtins.input', return_value='I love cats'): + assert input_on_stderr() == 'I love cats' + + # input_on_stderr uses `_convert` internally, from now on we will + # just use that function + + assert _convert('hack the planet') == 'hack the planet' + assert _convert('42') == '42' + assert _convert('42', default=10) == 42 + assert _convert('', default=10) == 10 + assert _convert('42', convert=int) == 42 + assert _convert('True', convert=bool) is True + assert _convert('False', convert=bool) is False + assert _convert('t', convert=bool) is True + assert _convert('3.14', default=1.0) == 3.14 + assert _convert('TrUe', default=False) is True + + with pytest.raises(ValueError): + assert _convert('TRVE', default=False) + + with pytest.raises(ValueError): + assert _convert('ಠ_ಠ', convert=int) + + +@pytest.mark.usefixtures('ignore_local_config_file', 'reset_bigchaindb_config') +def test_configure_bigchaindb_configures_bigchaindb(): + from bigchaindb.commands.utils import configure_bigchaindb + from bigchaindb.config_utils import is_configured + assert not is_configured() + + @configure_bigchaindb + def test_configure(args): + assert is_configured() + + args = Namespace(config=None) + test_configure(args) + + +@pytest.mark.usefixtures('ignore_local_config_file', + 'reset_bigchaindb_config', + 'reset_logging_config') +@pytest.mark.parametrize('log_level', tuple(map( + logging.getLevelName, + (logging.DEBUG, + logging.INFO, + logging.WARNING, + logging.ERROR, + logging.CRITICAL) +))) +def test_configure_bigchaindb_logging(log_level): + from bigchaindb.commands.utils import configure_bigchaindb + + @configure_bigchaindb + def test_configure_logger(args): + pass + + args = Namespace(config=None, log_level=log_level) + test_configure_logger(args) + from bigchaindb import config + assert config['log']['level_console'] == log_level + assert config['log']['level_logfile'] == log_level + + +def test_start_raises_if_command_not_implemented(): + from bigchaindb.commands import utils + from bigchaindb.commands.bigchaindb import create_parser + + parser = create_parser() + + with pytest.raises(NotImplementedError): + # Will raise because `scope`, the third parameter, + # doesn't contain the function `run_start` + utils.start(parser, ['start'], {}) + + +def test_start_raises_if_no_arguments_given(): + from bigchaindb.commands import utils + from bigchaindb.commands.bigchaindb import create_parser + + parser = create_parser() + + with pytest.raises(SystemExit): + utils.start(parser, [], {}) + + +@patch('multiprocessing.cpu_count', return_value=42) +def test_start_sets_multiprocess_var_based_on_cli_args(mock_cpu_count): + from bigchaindb.commands import utils + + def run_mp_arg_test(args): + return args + + parser = argparse.ArgumentParser() + subparser = parser.add_subparsers(title='Commands', + dest='command') + mp_arg_test_parser = subparser.add_parser('mp_arg_test') + mp_arg_test_parser.add_argument('-m', '--multiprocess', + nargs='?', + type=int, + default=False) + + scope = {'run_mp_arg_test': run_mp_arg_test} + assert utils.start(parser, ['mp_arg_test'], scope).multiprocess == 1 + assert utils.start(parser, ['mp_arg_test', '--multiprocess'], scope).multiprocess == 42 + + +def test_mongodb_host_type(): + from bigchaindb.commands.utils import mongodb_host + + # bad port provided + with pytest.raises(ArgumentTypeError): + mongodb_host('localhost:11111111111') + + # no port information provided + with pytest.raises(ArgumentTypeError): + mongodb_host('localhost') + + # bad host provided + with pytest.raises(ArgumentTypeError): + mongodb_host(':27017') diff --git a/tests/common/schema/test_schema.py b/tests/common/schema/test_schema.py index 02a00ee2..3116fa7d 100644 --- a/tests/common/schema/test_schema.py +++ b/tests/common/schema/test_schema.py @@ -1,5 +1,5 @@ from bigchaindb.common.schema import ( - TX_SCHEMA, VOTE_SCHEMA, drop_schema_descriptions) + TX_SCHEMA_COMMON, VOTE_SCHEMA, drop_schema_descriptions) def _test_additionalproperties(node, path=''): @@ -19,7 +19,7 @@ def _test_additionalproperties(node, path=''): def test_transaction_schema_additionalproperties(): - _test_additionalproperties(TX_SCHEMA) + _test_additionalproperties(TX_SCHEMA_COMMON) def test_vote_schema_additionalproperties(): diff --git a/tests/common/schema/test_transaction_schema.py b/tests/common/schema/test_transaction_schema.py index c9545ab3..dca10e70 100644 --- a/tests/common/schema/test_transaction_schema.py +++ b/tests/common/schema/test_transaction_schema.py @@ -29,3 +29,32 @@ def test_validate_fails_metadata_empty_dict(create_tx): create_tx.metadata = {} with raises(SchemaValidationError): validate_transaction_schema(create_tx.to_dict()) + + +def test_transfer_asset_schema(signed_transfer_tx): + tx = signed_transfer_tx.to_dict() + validate_transaction_schema(tx) + tx['asset']['data'] = {} + with raises(SchemaValidationError): + validate_transaction_schema(tx) + del tx['asset']['data'] + tx['asset']['id'] = 'b' * 63 + with raises(SchemaValidationError): + validate_transaction_schema(tx) + + +def test_create_single_input(create_tx): + tx = create_tx.to_dict() + tx['inputs'] += tx['inputs'] + with raises(SchemaValidationError): + validate_transaction_schema(tx) + tx['inputs'] = [] + with raises(SchemaValidationError): + validate_transaction_schema(tx) + + +def test_create_tx_no_fulfills(create_tx): + tx = create_tx.to_dict() + tx['inputs'][0]['fulfills'] = {'tx': 'a' * 64, 'output': 0} + with raises(SchemaValidationError): + validate_transaction_schema(tx) diff --git a/tests/common/test_transaction.py b/tests/common/test_transaction.py index 12c71497..adc6e60d 100644 --- a/tests/common/test_transaction.py +++ b/tests/common/test_transaction.py @@ -352,6 +352,17 @@ def test_tx_serialization_with_incorrect_hash(utx): utx_dict.pop('id') +def test_tx_serialization_hash_function(tx): + import sha3 + import json + tx_dict = tx.to_dict() + tx_dict['inputs'][0]['fulfillment'] = None + del tx_dict['id'] + payload = json.dumps(tx_dict, skipkeys=False, sort_keys=True, + separators=(',', ':')) + assert sha3.sha3_256(payload.encode()).hexdigest() == tx.id + + def test_invalid_input_initialization(user_input, user_pub): from bigchaindb.common.transaction import Input @@ -445,12 +456,15 @@ def test_transaction_link_eq(): def test_add_input_to_tx(user_input, asset_definition): from bigchaindb.common.transaction import Transaction + from .utils import validate_transaction_model tx = Transaction(Transaction.CREATE, asset_definition, [], []) tx.add_input(user_input) assert len(tx.inputs) == 1 + validate_transaction_model(tx) + def test_add_input_to_tx_with_invalid_parameters(asset_definition): from bigchaindb.common.transaction import Transaction @@ -460,11 +474,11 @@ def test_add_input_to_tx_with_invalid_parameters(asset_definition): tx.add_input('somewronginput') -def test_add_output_to_tx(user_output, asset_definition): +def test_add_output_to_tx(user_output, user_input, asset_definition): from bigchaindb.common.transaction import Transaction from .utils import validate_transaction_model - tx = Transaction(Transaction.CREATE, asset_definition) + tx = Transaction(Transaction.CREATE, asset_definition, [user_input]) tx.add_output(user_output) assert len(tx.outputs) == 1 @@ -544,38 +558,6 @@ def test_validate_input_with_invalid_parameters(utx): assert not valid -def test_validate_multiple_inputs(user_input, user_output, user_priv, - asset_definition): - from copy import deepcopy - - from bigchaindb.common.crypto import PrivateKey - from bigchaindb.common.transaction import Transaction - from .utils import validate_transaction_model - - tx = Transaction(Transaction.CREATE, asset_definition, - [user_input, deepcopy(user_input)], - [user_output, deepcopy(user_output)]) - - expected_first = deepcopy(tx) - expected_second = deepcopy(tx) - - expected_first_bytes = ('0:' + str(tx)).encode() - expected_first.inputs[0].fulfillment.sign(expected_first_bytes, - PrivateKey(user_priv)) - expected_second_bytes = ('1:' + str(tx)).encode() - expected_second.inputs[0].fulfillment.sign(expected_second_bytes, - PrivateKey(user_priv)) - tx.sign([user_priv]) - - assert tx.inputs[0].to_dict()['fulfillment'] == \ - expected_first.inputs[0].fulfillment.serialize_uri() - assert tx.inputs[1].to_dict()['fulfillment'] == \ - expected_second.inputs[0].fulfillment.serialize_uri() - assert tx.inputs_valid() is True - - validate_transaction_model(tx) - - def test_validate_tx_threshold_create_signature(user_user2_threshold_input, user_user2_threshold_output, user_pub, @@ -607,6 +589,44 @@ def test_validate_tx_threshold_create_signature(user_user2_threshold_input, validate_transaction_model(tx) +import pytest +@pytest.mark.skip() +def test_validate_tx_threshold_duplicated_pk(user_pub, user_priv, + asset_definition): + from copy import deepcopy + from cryptoconditions import Ed25519Fulfillment, ThresholdSha256Fulfillment + from bigchaindb.common.transaction import Input, Output, Transaction + from bigchaindb.common.crypto import PrivateKey + + threshold = ThresholdSha256Fulfillment(threshold=2) + threshold.add_subfulfillment(Ed25519Fulfillment(public_key=user_pub)) + threshold.add_subfulfillment(Ed25519Fulfillment(public_key=user_pub)) + + threshold_input = Input(threshold, [user_pub, user_pub]) + threshold_output = Output(threshold, [user_pub, user_pub]) + + tx = Transaction(Transaction.CREATE, asset_definition, + [threshold_input], [threshold_output]) + expected = deepcopy(threshold_input) + expected.fulfillment.subconditions[0]['body'].sign(str(tx).encode(), + PrivateKey(user_priv)) + expected.fulfillment.subconditions[1]['body'].sign(str(tx).encode(), + PrivateKey(user_priv)) + + tx.sign([user_priv, user_priv]) + + subconditions = tx.inputs[0].fulfillment.subconditions + expected_subconditions = expected.fulfillment.subconditions + assert subconditions[0]['body'].to_dict()['signature'] == \ + expected_subconditions[0]['body'].to_dict()['signature'] + assert subconditions[1]['body'].to_dict()['signature'] == \ + expected_subconditions[1]['body'].to_dict()['signature'] + + assert tx.inputs[0].to_dict()['fulfillment'] == \ + expected.fulfillment.serialize_uri() + assert tx.inputs_valid() is True + + def test_multiple_input_validation_of_transfer_tx(user_input, user_output, user_priv, user2_pub, user2_priv, user3_pub, @@ -618,8 +638,7 @@ def test_multiple_input_validation_of_transfer_tx(user_input, user_output, from cryptoconditions import Ed25519Fulfillment from .utils import validate_transaction_model - tx = Transaction(Transaction.CREATE, asset_definition, - [user_input, deepcopy(user_input)], + tx = Transaction(Transaction.CREATE, asset_definition, [user_input], [user_output, deepcopy(user_output)]) tx.sign([user_priv]) @@ -982,3 +1001,20 @@ def test_validate_version(utx): utx.version = '1.0.0' with raises(SchemaValidationError): validate_transaction_model(utx) + + +def test_create_tx_no_asset_id(b, utx): + from bigchaindb.common.exceptions import SchemaValidationError + from .utils import validate_transaction_model + utx.asset['id'] = 'b' * 64 + with raises(SchemaValidationError): + validate_transaction_model(utx) + + +def test_transfer_tx_asset_schema(transfer_utx): + from bigchaindb.common.exceptions import SchemaValidationError + from .utils import validate_transaction_model + tx = transfer_utx + tx.asset['data'] = {} + with raises(SchemaValidationError): + validate_transaction_model(tx) diff --git a/tests/conftest.py b/tests/conftest.py index 9612f38b..26beac11 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,8 @@ import random import pytest +from logging import getLogger +from logging.config import dictConfig from bigchaindb.common import crypto TEST_DB_NAME = 'bigchain_test' @@ -203,6 +205,15 @@ def ignore_local_config_file(monkeypatch): mock_file_config) +@pytest.fixture +def reset_logging_config(): + # root_logger_level = getLogger().level + root_logger_level = 'DEBUG' + dictConfig({'version': 1, 'root': {'level': 'NOTSET'}}) + yield + getLogger().setLevel(root_logger_level) + + @pytest.fixture def user_sk(): return USER_PRIVATE_KEY @@ -223,6 +234,54 @@ def user2_pk(): return USER2_PK +@pytest.fixture +def alice(): + from bigchaindb.common.crypto import generate_key_pair + return generate_key_pair() + + +@pytest.fixture +def alice_privkey(alice): + return alice.private_key + + +@pytest.fixture +def alice_pubkey(alice): + return alice.public_key + + +@pytest.fixture +def bob(): + from bigchaindb.common.crypto import generate_key_pair + return generate_key_pair() + + +@pytest.fixture +def bob_privkey(bob): + return bob.private_key + + +@pytest.fixture +def bob_pubkey(carol): + return bob.public_key + + +@pytest.fixture +def carol(): + from bigchaindb.common.crypto import generate_key_pair + return generate_key_pair() + + +@pytest.fixture +def carol_privkey(carol): + return carol.private_key + + +@pytest.fixture +def carol_pubkey(carol): + return carol.public_key + + @pytest.fixture def b(): from bigchaindb import Bigchain @@ -383,3 +442,15 @@ def db_name(db_config): def db_conn(): from bigchaindb.backend import connect return connect() + + +@pytest.fixture +def mocked_setup_pub_logger(mocker): + return mocker.patch( + 'bigchaindb.log.setup.setup_pub_logger', autospec=True, spec_set=True) + + +@pytest.fixture +def mocked_setup_sub_logger(mocker): + return mocker.patch( + 'bigchaindb.log.setup.setup_sub_logger', autospec=True, spec_set=True) diff --git a/tests/db/test_bigchain_api.py b/tests/db/test_bigchain_api.py index 96779e60..3f05385c 100644 --- a/tests/db/test_bigchain_api.py +++ b/tests/db/test_bigchain_api.py @@ -82,16 +82,16 @@ class TestBigchainApi(object): block = b.create_block([tx]) b.write_block(block) - assert b.has_previous_vote(block.id, block.voters) is False + assert b.has_previous_vote(block.id) is False vote = b.vote(block.id, b.get_last_voted_block().id, True) b.write_vote(vote) - assert b.has_previous_vote(block.id, block.voters) is True + assert b.has_previous_vote(block.id) is True @pytest.mark.genesis def test_get_spent_with_double_inclusion_detected(self, b, monkeypatch): - from bigchaindb.backend.exceptions import BigchainDBCritical + from bigchaindb.exceptions import CriticalDoubleInclusion from bigchaindb.models import Transaction tx = Transaction.create([b.me], [([b.me], 1)]) @@ -121,12 +121,47 @@ class TestBigchainApi(object): vote = b.vote(block3.id, b.get_last_voted_block().id, True) b.write_vote(vote) - with pytest.raises(BigchainDBCritical): + with pytest.raises(CriticalDoubleInclusion): + b.get_spent(tx.id, 0) + + @pytest.mark.genesis + def test_get_spent_with_double_spend_detected(self, b, monkeypatch): + from bigchaindb.exceptions import CriticalDoubleSpend + from bigchaindb.models import Transaction + + tx = Transaction.create([b.me], [([b.me], 1)]) + tx = tx.sign([b.me_private]) + + monkeypatch.setattr('time.time', lambda: 1000000000) + block1 = b.create_block([tx]) + b.write_block(block1) + + monkeypatch.setattr('time.time', lambda: 1000000020) + transfer_tx = Transaction.transfer(tx.to_inputs(), [([b.me], 1)], + asset_id=tx.id) + transfer_tx = transfer_tx.sign([b.me_private]) + block2 = b.create_block([transfer_tx]) + b.write_block(block2) + + monkeypatch.setattr('time.time', lambda: 1000000030) + transfer_tx2 = Transaction.transfer(tx.to_inputs(), [([b.me], 2)], + asset_id=tx.id) + transfer_tx2 = transfer_tx2.sign([b.me_private]) + block3 = b.create_block([transfer_tx2]) + b.write_block(block3) + + # Vote both block2 and block3 valid + vote = b.vote(block2.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + vote = b.vote(block3.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + + with pytest.raises(CriticalDoubleSpend): b.get_spent(tx.id, 0) @pytest.mark.genesis def test_get_block_status_for_tx_with_double_inclusion(self, b, monkeypatch): - from bigchaindb.backend.exceptions import BigchainDBCritical + from bigchaindb.exceptions import CriticalDoubleInclusion from bigchaindb.models import Transaction tx = Transaction.create([b.me], [([b.me], 1)]) @@ -146,7 +181,7 @@ class TestBigchainApi(object): vote = b.vote(block2.id, b.get_last_voted_block().id, True) b.write_vote(vote) - with pytest.raises(BigchainDBCritical): + with pytest.raises(CriticalDoubleInclusion): b.get_blocks_status_containing_tx(tx.id) @pytest.mark.genesis @@ -411,75 +446,6 @@ class TestBigchainApi(object): b.write_vote(b.vote(block_3.id, b.get_last_voted_block().id, True)) assert b.get_last_voted_block().id == block_3.id - def test_no_vote_written_if_block_already_has_vote(self, b, genesis_block): - from bigchaindb.models import Block - - block_1 = dummy_block() - b.write_block(block_1) - - b.write_vote(b.vote(block_1.id, genesis_block.id, True)) - retrieved_block_1 = b.get_block(block_1.id) - retrieved_block_1 = Block.from_dict(retrieved_block_1) - - # try to vote again on the retrieved block, should do nothing - b.write_vote(b.vote(retrieved_block_1.id, genesis_block.id, True)) - retrieved_block_2 = b.get_block(block_1.id) - retrieved_block_2 = Block.from_dict(retrieved_block_2) - - assert retrieved_block_1 == retrieved_block_2 - - @pytest.mark.genesis - def test_more_votes_than_voters(self, b): - from bigchaindb.common.exceptions import MultipleVotesError - - block_1 = dummy_block() - b.write_block(block_1) - # insert duplicate votes - vote_1 = b.vote(block_1.id, b.get_last_voted_block().id, True) - vote_2 = b.vote(block_1.id, b.get_last_voted_block().id, True) - vote_2['node_pubkey'] = 'aaaaaaa' - b.write_vote(vote_1) - b.write_vote(vote_2) - - with pytest.raises(MultipleVotesError) as excinfo: - b.block_election_status(block_1.id, block_1.voters) - assert excinfo.value.args[0] == 'Block {block_id} has {n_votes} votes cast, but only {n_voters} voters'\ - .format(block_id=block_1.id, n_votes=str(2), n_voters=str(1)) - - def test_multiple_votes_single_node(self, b, genesis_block): - from bigchaindb.common.exceptions import MultipleVotesError - - block_1 = dummy_block() - b.write_block(block_1) - # insert duplicate votes - for i in range(2): - b.write_vote(b.vote(block_1.id, genesis_block.id, True)) - - with pytest.raises(MultipleVotesError) as excinfo: - b.block_election_status(block_1.id, block_1.voters) - assert excinfo.value.args[0] == 'Block {block_id} has multiple votes ({n_votes}) from voting node {node_id}'\ - .format(block_id=block_1.id, n_votes=str(2), node_id=b.me) - - with pytest.raises(MultipleVotesError) as excinfo: - b.has_previous_vote(block_1.id, block_1.voters) - assert excinfo.value.args[0] == 'Block {block_id} has {n_votes} votes from public key {me}'\ - .format(block_id=block_1.id, n_votes=str(2), me=b.me) - - @pytest.mark.genesis - def test_improper_vote_error(selfs, b): - from bigchaindb.common.exceptions import ImproperVoteError - - block_1 = dummy_block() - b.write_block(block_1) - vote_1 = b.vote(block_1.id, b.get_last_voted_block().id, True) - # mangle the signature - vote_1['signature'] = 'a' * 87 - b.write_vote(vote_1) - with pytest.raises(ImproperVoteError) as excinfo: - b.has_previous_vote(block_1.id, block_1.id) - assert excinfo.value.args[0] == 'Block {block_id} already has an incorrectly signed ' \ - 'vote from public key {me}'.format(block_id=block_1.id, me=b.me) - @pytest.mark.usefixtures('inputs') def test_assign_transaction_one_node(self, b, user_pk, user_sk): from bigchaindb.backend import query @@ -530,7 +496,7 @@ class TestBigchainApi(object): @pytest.mark.usefixtures('inputs') def test_non_create_input_not_found(self, b, user_pk): from cryptoconditions import Ed25519Fulfillment - from bigchaindb.common.exceptions import TransactionDoesNotExist + from bigchaindb.common.exceptions import InputDoesNotExist from bigchaindb.common.transaction import Input, TransactionLink from bigchaindb.models import Transaction from bigchaindb import Bigchain @@ -542,7 +508,7 @@ class TestBigchainApi(object): tx = Transaction.transfer([input], [([user_pk], 1)], asset_id='mock_asset_link') - with pytest.raises(TransactionDoesNotExist): + with pytest.raises(InputDoesNotExist): tx.validate(Bigchain()) def test_count_backlog(self, b, user_pk): @@ -559,30 +525,12 @@ class TestBigchainApi(object): class TestTransactionValidation(object): - def test_create_operation_with_inputs(self, b, user_pk, create_tx): - from bigchaindb.common.transaction import TransactionLink - - # Manipulate input so that it has a `fulfills` defined even - # though it shouldn't have one - create_tx.inputs[0].fulfills = TransactionLink('abc', 0) - with pytest.raises(ValueError) as excinfo: - b.validate_transaction(create_tx) - assert excinfo.value.args[0] == 'A CREATE operation has no inputs' - - def test_transfer_operation_no_inputs(self, b, user_pk, - signed_transfer_tx): - signed_transfer_tx.inputs[0].fulfills = None - with pytest.raises(ValueError) as excinfo: - b.validate_transaction(signed_transfer_tx) - - assert excinfo.value.args[0] == 'Only `CREATE` transactions can have null inputs' - def test_non_create_input_not_found(self, b, user_pk, signed_transfer_tx): - from bigchaindb.common.exceptions import TransactionDoesNotExist + from bigchaindb.common.exceptions import InputDoesNotExist from bigchaindb.common.transaction import TransactionLink signed_transfer_tx.inputs[0].fulfills = TransactionLink('c', 0) - with pytest.raises(TransactionDoesNotExist): + with pytest.raises(InputDoesNotExist): b.validate_transaction(signed_transfer_tx) @pytest.mark.usefixtures('inputs') @@ -741,7 +689,7 @@ class TestBlockValidation(object): b.validate_block(block) def test_invalid_node_pubkey(self, b): - from bigchaindb.common.exceptions import OperationError + from bigchaindb.common.exceptions import SybilError from bigchaindb.common import crypto # blocks can only be created by a federation node @@ -758,8 +706,8 @@ class TestBlockValidation(object): # from a non federation node block = block.sign(tmp_sk) - # check that validate_block raises an OperationError - with pytest.raises(OperationError): + # check that validate_block raises an SybilError + with pytest.raises(SybilError): b.validate_block(block) @@ -778,7 +726,7 @@ class TestMultipleInputs(object): tx = tx.sign([user_sk]) # validate transaction - assert b.is_valid_transaction(tx) == tx + tx.validate(b) assert len(tx.inputs) == 1 assert len(tx.outputs) == 1 @@ -800,7 +748,7 @@ class TestMultipleInputs(object): asset_id=input_tx.id) tx = tx.sign([user_sk]) - assert b.is_valid_transaction(tx) == tx + tx.validate(b) assert len(tx.inputs) == 1 assert len(tx.outputs) == 1 @@ -832,7 +780,7 @@ class TestMultipleInputs(object): transfer_tx = transfer_tx.sign([user_sk, user2_sk]) # validate transaction - assert b.is_valid_transaction(transfer_tx) == transfer_tx + transfer_tx.validate(b) assert len(transfer_tx.inputs) == 1 assert len(transfer_tx.outputs) == 1 @@ -865,7 +813,7 @@ class TestMultipleInputs(object): asset_id=tx_input.id) tx = tx.sign([user_sk, user2_sk]) - assert b.is_valid_transaction(tx) == tx + tx.validate(b) assert len(tx.inputs) == 1 assert len(tx.outputs) == 1 @@ -1219,7 +1167,6 @@ def test_cant_spend_same_input_twice_in_tx(b, genesis_block): tx_transfer = Transaction.transfer(dup_inputs, [([b.me], 200)], asset_id=tx_create.id) tx_transfer_signed = tx_transfer.sign([b.me_private]) - assert b.is_valid_transaction(tx_transfer_signed) is False with pytest.raises(DoubleSpend): tx_transfer_signed.validate(b) diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index 6597a0e7..2bf0ebcd 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -5,27 +5,6 @@ import pytest pytestmark = [pytest.mark.bdb, pytest.mark.usefixtures('processes')] -def test_fast_double_create(b, user_pk): - from bigchaindb.models import Transaction - from bigchaindb.backend.query import count_blocks - tx = Transaction.create([b.me], [([user_pk], 1)], - metadata={'test': 'test'}).sign([b.me_private]) - - # write everything fast - b.write_transaction(tx) - b.write_transaction(tx) - - time.sleep(2) - tx_returned = b.get_transaction(tx.id) - - # test that the tx can be queried - assert tx_returned == tx - # test the transaction appears only once - last_voted_block = b.get_last_voted_block() - assert len(last_voted_block.transactions) == 1 - assert count_blocks(b.connection) == 2 - - def test_double_create(b, user_pk): from bigchaindb.models import Transaction from bigchaindb.backend.query import count_blocks diff --git a/tests/log/test_loggers.py b/tests/log/test_loggers.py new file mode 100644 index 00000000..795de046 --- /dev/null +++ b/tests/log/test_loggers.py @@ -0,0 +1,18 @@ +from logging.handlers import SocketHandler + + +class TestHttpServerLogger: + + def test_init(self, mocker): + from bigchaindb.log.configs import ( + DEFAULT_SOCKET_LOGGING_ADDR as expected_socket_address) + from bigchaindb.log.loggers import HttpServerLogger + mocked_config = mocker.patch( + 'gunicorn.config.Config', autospec=True, spec_set=True) + logger = HttpServerLogger(mocked_config.return_value) + assert len(logger.access_log.handlers) == 1 + assert len(logger.error_log.handlers) == 1 + assert isinstance(logger.access_log.handlers[0], SocketHandler) + assert isinstance(logger.error_log.handlers[0], SocketHandler) + assert logger.access_log.handlers[0].address == expected_socket_address + assert logger.error_log.handlers[0].address == expected_socket_address diff --git a/tests/log/test_setup.py b/tests/log/test_setup.py new file mode 100644 index 00000000..0e608d26 --- /dev/null +++ b/tests/log/test_setup.py @@ -0,0 +1,321 @@ +import logging +import pickle +from logging import getLogger +from logging.config import dictConfig +from logging.handlers import SocketHandler + +from pytest import fixture, mark, raises + + +@fixture +def reset_logging_config(): + original_root_logger_level = getLogger().level + dictConfig({'version': 1, 'root': {'level': 'NOTSET'}}) + yield + getLogger().setLevel(original_root_logger_level) + + +@fixture +def mocked_process(mocker): + return mocker.patch( + 'bigchaindb.log.setup.Process', autospec=True, spec_set=True) + + +@fixture +def mocked_socket_server(mocker): + return mocker.patch( + 'bigchaindb.log.setup.LogRecordSocketServer', + autospec=True, + spec_set=True, + ) + + +@fixture +def log_record_dict(): + return { + 'args': None, + 'created': 1489584900.595193, + 'exc_info': None, + 'exc_text': None, + 'filename': 'config_utils.py', + 'funcName': 'autoconfigure', + 'levelname': 'DEBUG', + 'levelno': 10, + 'lineno': 228, + 'module': 'config_utils', + 'msecs': 595.1929092407227, + 'msg': 'System already configured, skipping autoconfiguration', + 'name': 'bigchaindb.config_utils', + 'pathname': '/usr/src/app/bigchaindb/config_utils.py', + 'process': 1981, + 'processName': 'MainProcess', + 'relativeCreated': 398.4854221343994, + 'stack_info': None, + 'thread': 140352503879424, + 'threadName': 'MainThread', + } + + +@fixture +def log_record(log_record_dict): + return logging.makeLogRecord(log_record_dict) + + +@fixture +def log_record_bytes(log_record_dict): + return pickle.dumps(log_record_dict) + + +@mark.usefixtures('reset_logging_config') +def test_setup_logging(mocked_setup_pub_logger, mocked_setup_sub_logger): + from bigchaindb.log.setup import setup_logging + setup_logging() + mocked_setup_pub_logger.assert_called_once_with() + mocked_setup_sub_logger.assert_called_once_with(user_log_config=None) + + +@mark.usefixtures('reset_logging_config') +def test_setup_pub_logger(): + from bigchaindb.log.setup import setup_pub_logger + from bigchaindb.log.configs import PUBLISHER_LOGGING_CONFIG + root_logger = getLogger() + assert root_logger.level == logging.NOTSET + setup_pub_logger() + assert root_logger.level == PUBLISHER_LOGGING_CONFIG['root']['level'] + assert root_logger.hasHandlers() + assert isinstance(root_logger.handlers[0], SocketHandler) + + +@mark.usefixtures('reset_logging_config') +def test_setup_sub_logger_without_config(mocked_socket_server, mocked_process): + from bigchaindb.log.setup import setup_sub_logger + setup_sub_logger() + root_logger = getLogger() + assert root_logger.level == logging.NOTSET + mocked_socket_server.assert_called_once_with() + mocked_process.assert_called_once_with( + target=mocked_socket_server.return_value.serve_forever, + kwargs={'log_config': None}, + ) + mocked_process.return_value.start.assert_called_once_with() + + +@mark.usefixtures('reset_logging_config') +def test_setup_sub_logger_with_config(mocked_socket_server, mocked_process): + from bigchaindb.log.setup import setup_sub_logger + user_log_config = { + 'file': '/var/log/bdb.log', + 'level_console': 'warning', + 'level_logfile': 'info', + 'fmt_console': '[%(levelname)s] (%(name)s) %(message)s', + 'fmt_logfile': '[%(asctime)s] [%(levelname)s] (%(name)s) %(message)s', + 'granular_levels': { + 'bigchaindb.core': 'debug', + }, + } + root_logger = getLogger() + setup_sub_logger(user_log_config=user_log_config) + assert root_logger.level == logging.NOTSET + mocked_socket_server.assert_called_once_with() + mocked_process.assert_called_once_with( + target=mocked_socket_server.return_value.serve_forever, + kwargs={'log_config': user_log_config}, + ) + mocked_process.return_value.start.assert_called_once_with() + + +def test_create_subscriber_logging_config_without_user_given_config(): + from bigchaindb.log.setup import create_subscriber_logging_config + from bigchaindb.log.configs import SUBSCRIBER_LOGGING_CONFIG + config = create_subscriber_logging_config() + assert config == SUBSCRIBER_LOGGING_CONFIG + + +def test_create_subscriber_logging_config_with_user_given_config(): + from bigchaindb.log.setup import create_subscriber_logging_config + from bigchaindb.log.configs import ( + SUBSCRIBER_LOGGING_CONFIG as expected_log_config) + user_log_config = { + 'file': '/var/log/bigchaindb/bdb.log', + 'error_file': '/var/log/bigchaindb/bdb-err.log', + 'level_console': 'warning', + 'level_logfile': 'info', + 'fmt_console': '[%(levelname)s] (%(name)s) %(message)s', + 'fmt_logfile': '[%(asctime)s] [%(levelname)s] (%(name)s) %(message)s', + 'datefmt_console': '%H:%M:%S', + 'datefmt_logfile': '%a, %d %b %Y %H:%M:%S +0000', + 'granular_levels': { + 'bigchaindb.core': 'debug', + }, + } + config = create_subscriber_logging_config(user_log_config=user_log_config) + assert config['root']['level'] == expected_log_config['root']['level'] + assert all(config['loggers'][logger]['level'] == level.upper() + for logger, level in user_log_config['granular_levels'].items()) + assert len(config) == len(expected_log_config) + assert config['version'] == expected_log_config['version'] + assert (config['disable_existing_loggers'] == + expected_log_config['disable_existing_loggers']) + assert (config['formatters']['console']['format'] == + user_log_config['fmt_console']) + assert (config['formatters']['file']['format'] == + user_log_config['fmt_logfile']) + assert (config['formatters']['console']['datefmt'] == + user_log_config['datefmt_console']) + assert (config['formatters']['file']['datefmt'] == + user_log_config['datefmt_logfile']) + assert (config['handlers']['console']['level'] == + user_log_config['level_console'].upper()) + assert (config['handlers']['file']['level'] == + user_log_config['level_logfile'].upper()) + assert config['handlers']['errors']['level'] == logging.ERROR + assert config['handlers']['file']['filename'] == user_log_config['file'] + assert (config['handlers']['errors']['filename'] == + user_log_config['error_file']) + del config['handlers']['console']['level'] + del config['handlers']['file']['level'] + del config['handlers']['file']['filename'] + del config['formatters']['console']['format'] + del config['formatters']['console']['datefmt'] + del config['formatters']['file']['format'] + del config['formatters']['file']['datefmt'] + del expected_log_config['handlers']['console']['level'] + del expected_log_config['handlers']['file']['level'] + del expected_log_config['handlers']['file']['filename'] + del expected_log_config['formatters']['console']['format'] + del expected_log_config['formatters']['console']['datefmt'] + del expected_log_config['formatters']['file']['format'] + del expected_log_config['formatters']['file']['datefmt'] + assert (config['handlers']['console'] == + expected_log_config['handlers']['console']) + assert (config['handlers']['file'] == + expected_log_config['handlers']['file']) + assert (config['formatters']['console'] == + expected_log_config['formatters']['console']) + assert (config['formatters']['file'] == + expected_log_config['formatters']['file']) + + +def test_normalize_log_level(): + from bigchaindb.common.exceptions import ConfigurationError + from bigchaindb.log.setup import _normalize_log_level + with raises(ConfigurationError) as exc: + _normalize_log_level(2) + assert exc.value.args == ('Log level must be a string!',) + assert isinstance(exc.value.__cause__, AttributeError) + assert exc.value.__cause__.args == ( + "'int' object has no attribute 'upper'",) + + +class TestLogRecordSocketServer: + + def test_init(self): + from bigchaindb.log.setup import (LogRecordSocketServer, + LogRecordStreamHandler) + server = LogRecordSocketServer() + assert server.allow_reuse_address + assert server.server_address == ( + '127.0.0.1', logging.handlers.DEFAULT_TCP_LOGGING_PORT) + assert server.RequestHandlerClass == LogRecordStreamHandler + server.server_close() + + @mark.parametrize('side_effect', (None, KeyboardInterrupt)) + def test_server_forever(self, mocker, side_effect): + from bigchaindb.log.setup import LogRecordSocketServer + nocked_create_subscriber_logging_config = mocker.patch( + 'bigchaindb.log.setup.create_subscriber_logging_config', + autospec=True, + spec_set=True, + ) + mocked_dict_config = mocker.patch('bigchaindb.log.setup.dictConfig', + autospec=True, spec_set=True) + mocked_parent_serve_forever = mocker.patch( + 'bigchaindb.log.setup.ThreadingTCPServer.serve_forever', + autospec=True, + spec_set=True, + side_effect=side_effect, + ) + server = LogRecordSocketServer() + with server: + server.serve_forever() + nocked_create_subscriber_logging_config.assert_called_once_with( + user_log_config=None) + mocked_dict_config.assert_called_once_with( + nocked_create_subscriber_logging_config.return_value) + mocked_parent_serve_forever.assert_called_once_with(server, + poll_interval=0.5) + + +class TestLogRecordStreamHandler: + + def test_handle(self, mocker, log_record_dict, log_record_bytes): + from bigchaindb.log.setup import LogRecordStreamHandler + + chunks = [log_record_bytes, b'\x00\x00\x02T'] + mocked_handle_log_record = mocker.patch( + 'bigchaindb.log.setup.LogRecordStreamHandler.handle_log_record', + autospec=True, + spec_set=True, + ) + + def mocked_recv(bufsize): + try: + return chunks.pop() + except IndexError: + return b' ' + + request = mocker.patch('socket.socket', autospec=True, spec_set=True) + request.return_value.recv = mocked_recv + client_address = ('127.0.0.1', 9020) + LogRecordStreamHandler( + request.return_value, client_address, None) + assert mocked_handle_log_record.called + assert (mocked_handle_log_record.call_args[0][1].__dict__ == + log_record_dict) + + def test_handle_log_record(self, mocker, log_record): + from bigchaindb.log.setup import LogRecordStreamHandler + mocker.patch('bigchaindb.log.setup.LogRecordStreamHandler.handle') + mocked_logger_handle = mocker.patch( + 'bigchaindb.log.setup.logging.Logger.handle', + autospec=True, spec_set=True) + request = mocker.patch('socket.socket', autospec=True, spec_set=True) + client_address = ('127.0.0.1', 9020) + handler = LogRecordStreamHandler( + request.return_value, client_address, None) + handler.handle_log_record(log_record) + assert log_record in mocked_logger_handle.call_args[0] + + def test_unpickle(self, mocker, log_record_bytes, log_record_dict): + from bigchaindb.log.setup import LogRecordStreamHandler + mocker.patch('bigchaindb.log.setup.LogRecordStreamHandler.handle') + request = mocker.patch('socket.socket', autospec=True, spec_set=True) + client_address = ('127.0.0.1', 9020) + handler = LogRecordStreamHandler( + request.return_value, client_address, None) + obj = handler.unpickle(log_record_bytes) + assert obj == log_record_dict + + @mark.parametrize('error', ( + pickle.UnpicklingError, AttributeError, EOFError, TypeError)) + def test_unpickle_error(self, mocker, error): + from bigchaindb.log.setup import LogRecordStreamHandler + mocker.patch('bigchaindb.log.setup.LogRecordStreamHandler.handle') + mocker.patch( + 'bigchaindb.log.setup.pickle.loads', + autospec=True, + spec_set=True, + side_effect=error('msg'), + ) + request = mocker.patch('socket.socket', autospec=True, spec_set=True) + client_address = ('127.0.0.1', 9020) + handler = LogRecordStreamHandler( + request.return_value, client_address, None) + obj = handler.unpickle(None) + assert obj == { + 'msg': '({}) Log handling error: un-pickling failed!'.format( + error.__name__), + 'exc_info': ('msg',), + 'level': logging.ERROR, + 'func': handler.unpickle.__name__, + } diff --git a/tests/pipelines/stepping.py b/tests/pipelines/stepping.py index 0e286829..030863c6 100644 --- a/tests/pipelines/stepping.py +++ b/tests/pipelines/stepping.py @@ -72,6 +72,7 @@ class MultipipesStepper: r = f(**kwargs) if r is not None: self._enqueue(next_name, r) + return r self.tasks[name] = functools.wraps(f)(inner) self.input_tasks.add(name) @@ -90,6 +91,7 @@ class MultipipesStepper: out = f(*args, **kwargs) if out is not None and next: self._enqueue(next_name, out) + return out task = functools.wraps(f)(inner) self.tasks[name] = task @@ -111,12 +113,12 @@ class MultipipesStepper: logging.debug('Stepping %s', name) task = self.tasks[name] if name in self.input_tasks: - task(**kwargs) + return task(**kwargs) else: queue = self.queues.get(name, []) if not queue: raise Empty(name) - task(*queue.pop(0), **kwargs) + return task(*queue.pop(0), **kwargs) logging.debug('Stepped %s', name) @property diff --git a/tests/pipelines/test_block_creation.py b/tests/pipelines/test_block_creation.py index 2991f3cf..27efc65d 100644 --- a/tests/pipelines/test_block_creation.py +++ b/tests/pipelines/test_block_creation.py @@ -46,28 +46,19 @@ def test_validate_transaction_handles_exceptions(b, signed_create_tx): """ from bigchaindb.pipelines.block import BlockPipeline block_maker = BlockPipeline() + from bigchaindb.common.exceptions import ValidationError - # Test SchemaValidationError tx_dict = signed_create_tx.to_dict() - tx_dict['invalid_key'] = 'schema validation gonna getcha!' - assert block_maker.validate_tx(tx_dict) is None - # Test InvalidHash - tx_dict = signed_create_tx.to_dict() - tx_dict['id'] = 'a' * 64 - assert block_maker.validate_tx(tx_dict) is None + with patch('bigchaindb.models.Transaction.validate') as validate: + # Assert that validationerror gets caught + validate.side_effect = ValidationError() + assert block_maker.validate_tx(tx_dict) is None - # Test InvalidSignature when we pass a bad fulfillment - tx_dict = signed_create_tx.to_dict() - tx_dict['inputs'][0]['fulfillment'] = 'cf:0:aaaaaaaaaaaaaaaaaaaaaaaaa' - assert block_maker.validate_tx(tx_dict) is None - - # Test AmountError - signed_create_tx.outputs[0].amount = 0 - tx_dict = signed_create_tx.to_dict() - # set the correct value back so that we can continue using it - signed_create_tx.outputs[0].amount = 1 - assert block_maker.validate_tx(tx_dict) is None + # Assert that another error doesnt + validate.side_effect = IOError() + with pytest.raises(IOError): + block_maker.validate_tx(tx_dict) def test_create_block(b, user_pk): @@ -226,3 +217,12 @@ def test_full_pipeline(b, user_pk): block_len = len(block_doc.transactions) assert chained_block == block_doc assert number_assigned_to_others == 100 - block_len + + +def test_block_snowflake(create_tx, signed_transfer_tx): + from bigchaindb.pipelines.block import tx_collector + snowflake = tx_collector() + assert snowflake.send(create_tx) == [create_tx] + snowflake.send(signed_transfer_tx) + snowflake.send(create_tx) + assert snowflake.send(None) == [create_tx, signed_transfer_tx] diff --git a/tests/pipelines/test_election.py b/tests/pipelines/test_election.py index 5cf6fc14..f0dd232d 100644 --- a/tests/pipelines/test_election.py +++ b/tests/pipelines/test_election.py @@ -83,12 +83,6 @@ def test_check_for_quorum_invalid_prev_node(b, user_pk): def test_check_for_quorum_valid(b, user_pk): from bigchaindb.models import Transaction - e = election.Election() - - # create blocks with transactions - tx1 = Transaction.create([b.me], [([user_pk], 1)]) - test_block = b.create_block([tx1]) - # simulate a federation with four voters key_pairs = [crypto.generate_key_pair() for _ in range(4)] test_federation = [ @@ -96,8 +90,13 @@ def test_check_for_quorum_valid(b, user_pk): for key_pair in key_pairs ] + b.nodes_except_me = [key_pair[1] for key_pair in key_pairs] + + # create blocks with transactions + tx1 = Transaction.create([b.me], [([user_pk], 1)]) + test_block = b.create_block([tx1]) + # add voters to block and write - test_block.voters = [key_pair[1] for key_pair in key_pairs] test_block = test_block.sign(b.me_private) b.write_block(test_block) @@ -108,10 +107,20 @@ def test_check_for_quorum_valid(b, user_pk): for vote in votes: b.write_vote(vote) + e = election.Election() + e.bigchain = b + # since this block is valid, should go nowhere assert e.check_for_quorum(votes[-1]) is None +@patch('bigchaindb.core.Bigchain.get_block') +def test_invalid_vote(get_block, b): + e = election.Election() + assert e.check_for_quorum({}) is None + get_block.assert_not_called() + + @pytest.mark.bdb def test_check_requeue_transaction(b, user_pk): from bigchaindb.models import Transaction @@ -190,3 +199,27 @@ def test_full_pipeline(b, user_pk): tx_from_block = set([tx.id for tx in invalid_block.transactions]) tx_from_backlog = set([tx['id'] for tx in list(query.get_stale_transactions(b.connection, 0))]) assert tx_from_block == tx_from_backlog + + +def test_handle_block_events(): + from bigchaindb.events import setup_events_queue, EventTypes + + events_queue = setup_events_queue() + e = election.Election(events_queue=events_queue) + block_id = 'a' * 64 + + assert events_queue.qsize() == 0 + + # no event should be emitted in case a block is undecided + e.handle_block_events({'status': Bigchain.BLOCK_UNDECIDED}, block_id) + assert events_queue.qsize() == 0 + + # put an invalid block event in the queue + e.handle_block_events({'status': Bigchain.BLOCK_INVALID}, block_id) + event = e.event_handler.get_event() + assert event.type == EventTypes.BLOCK_INVALID + + # put a valid block event in the queue + e.handle_block_events({'status': Bigchain.BLOCK_VALID}, block_id) + event = e.event_handler.get_event() + assert event.type == EventTypes.BLOCK_VALID diff --git a/tests/pipelines/test_stale_monitor.py b/tests/pipelines/test_stale_monitor.py index 06ee5b5f..6e2b12b8 100644 --- a/tests/pipelines/test_stale_monitor.py +++ b/tests/pipelines/test_stale_monitor.py @@ -36,7 +36,11 @@ def test_reassign_transactions(b, user_pk): stm = stale.StaleTransactionMonitor(timeout=0.001, backlog_reassign_delay=0.001) - stm.reassign_transactions(tx.to_dict()) + # This worked previously because transaction['assignee'] was only used if + # bigchain.nodes_except_me was not empty. + tx_dict = tx.to_dict() + tx_dict['assignee'] = b.me + stm.reassign_transactions(tx_dict) # test with federation tx = Transaction.create([b.me], [([user_pk], 1)]) @@ -58,7 +62,7 @@ def test_reassign_transactions(b, user_pk): tx = tx.sign([b.me_private]) stm.bigchain.nodes_except_me = ['lol'] b.write_transaction(tx) - stm.bigchain.nodes_except_me = None + stm.bigchain.nodes_except_me = [] tx = list(query.get_stale_transactions(b.connection, 0))[0] stm.reassign_transactions(tx) diff --git a/tests/pipelines/test_steps.py b/tests/pipelines/test_steps.py index c63a673a..834162fc 100644 --- a/tests/pipelines/test_steps.py +++ b/tests/pipelines/test_steps.py @@ -20,9 +20,26 @@ def test_stepping_changefeed_produces_update(b, steps): [tx.id, tx.id]) +@pytest.mark.bdb +@pytest.mark.genesis +def test_dupe_tx_in_block(b, steps): + tx = input_single_create(b) + for i in range(2): + steps.stale_check_transactions() + steps.stale_reassign_transactions() + steps.block_changefeed() + steps.block_filter_tx() + steps.block_validate_tx() + steps.block_validate_tx() + assert steps.counts == {'block_create': 2} + steps.block_create(timeout=False) + block = steps.block_create(timeout=True) + assert block.transactions == [tx] + + def input_single_create(b): from bigchaindb.common.transaction import Transaction metadata = {'r': random.random()} - tx = Transaction.create([b.me], [([b.me], 1)], metadata) + tx = Transaction.create([b.me], [([b.me], 1)], metadata).sign([b.me_private]) b.write_transaction(tx) return tx diff --git a/tests/pipelines/test_vote.py b/tests/pipelines/test_vote.py index 20beac1e..7df7ca11 100644 --- a/tests/pipelines/test_vote.py +++ b/tests/pipelines/test_vote.py @@ -111,6 +111,18 @@ def test_validate_block_with_invalid_id(b): assert invalid_dummy_tx == [vote_obj.invalid_dummy_tx] +@pytest.mark.genesis +def test_validate_block_with_duplicated_transactions(b): + from bigchaindb.pipelines import vote + + tx = dummy_tx(b) + block = b.create_block([tx, tx]).to_dict() + + vote_obj = vote.Vote() + block_id, invalid_dummy_tx = vote_obj.validate_block(block) + assert invalid_dummy_tx == [vote_obj.invalid_dummy_tx] + + @pytest.mark.genesis def test_validate_block_with_invalid_signature(b): from bigchaindb.pipelines import vote @@ -128,17 +140,23 @@ def test_validate_block_with_invalid_signature(b): @pytest.mark.genesis def test_vote_validate_transaction(b): from bigchaindb.pipelines import vote - from bigchaindb.models import Transaction + from bigchaindb.common.exceptions import ValidationError tx = dummy_tx(b) vote_obj = vote.Vote() validation = vote_obj.validate_tx(tx, 123, 1) assert validation == (True, 123, 1) - # NOTE: Submit unsigned transaction to `validate_tx` yields `False`. - tx = Transaction.create([b.me], [([b.me], 1)]) - validation = vote_obj.validate_tx(tx, 456, 10) - assert validation == (False, 456, 10) + with patch('bigchaindb.models.Transaction.validate') as validate: + # Assert that validationerror gets caught + validate.side_effect = ValidationError() + validation = vote_obj.validate_tx(tx, 456, 10) + assert validation == (False, 456, 10) + + # Assert that another error doesnt + validate.side_effect = IOError() + with pytest.raises(IOError): + validation = vote_obj.validate_tx(tx, 456, 10) @pytest.mark.genesis diff --git a/tests/test_config_utils.py b/tests/test_config_utils.py index 602e9b57..bb445d83 100644 --- a/tests/test_config_utils.py +++ b/tests/test_config_utils.py @@ -1,4 +1,5 @@ import copy +import logging from unittest.mock import mock_open, patch import pytest @@ -11,7 +12,6 @@ ORIGINAL_CONFIG = copy.deepcopy(bigchaindb._config) @pytest.fixture(scope='function', autouse=True) def clean_config(monkeypatch, request): - import bigchaindb original_config = copy.deepcopy(ORIGINAL_CONFIG) backend = request.config.getoption('--database-backend') @@ -145,23 +145,33 @@ def test_autoconfigure_read_both_from_file_and_env(monkeypatch, request): DATABASE_PORT = 4242 DATABASE_BACKEND = request.config.getoption('--database-backend') SERVER_BIND = '1.2.3.4:56' + WSSERVER_HOST = '1.2.3.4' + WSSERVER_PORT = 57 KEYRING = 'pubkey_0:pubkey_1:pubkey_2' + LOG_FILE = '/somewhere/something.log' file_config = { 'database': { 'host': DATABASE_HOST }, - 'backlog_reassign_delay': 5 + 'backlog_reassign_delay': 5, + 'log': { + 'level_console': 'debug', + }, } monkeypatch.setattr('bigchaindb.config_utils.file_config', lambda *args, **kwargs: file_config) monkeypatch.setattr('os.environ', {'BIGCHAINDB_DATABASE_NAME': DATABASE_NAME, 'BIGCHAINDB_DATABASE_PORT': str(DATABASE_PORT), 'BIGCHAINDB_DATABASE_BACKEND': DATABASE_BACKEND, 'BIGCHAINDB_SERVER_BIND': SERVER_BIND, - 'BIGCHAINDB_KEYRING': KEYRING}) + 'BIGCHAINDB_WSSERVER_HOST': WSSERVER_HOST, + 'BIGCHAINDB_WSSERVER_PORT': WSSERVER_PORT, + 'BIGCHAINDB_KEYRING': KEYRING, + 'BIGCHAINDB_LOG_FILE': LOG_FILE}) import bigchaindb from bigchaindb import config_utils + from bigchaindb.log.configs import SUBSCRIBER_LOGGING_CONFIG as log_config config_utils.autoconfigure() database_rethinkdb = { @@ -169,13 +179,21 @@ def test_autoconfigure_read_both_from_file_and_env(monkeypatch, request): 'host': DATABASE_HOST, 'port': DATABASE_PORT, 'name': DATABASE_NAME, + 'connection_timeout': 5000, + 'max_tries': 3 } + database_mongodb = { 'backend': 'mongodb', 'host': DATABASE_HOST, 'port': DATABASE_PORT, 'name': DATABASE_NAME, + 'connection_timeout': 5000, + 'max_tries': 3, 'replicaset': 'bigchain-rs', + 'ssl': False, + 'login': None, + 'password': None } database = {} @@ -188,16 +206,34 @@ def test_autoconfigure_read_both_from_file_and_env(monkeypatch, request): 'CONFIGURED': True, 'server': { 'bind': SERVER_BIND, + 'loglevel': logging.getLevelName( + log_config['handlers']['console']['level']).lower(), 'workers': None, 'threads': None, }, + 'wsserver': { + 'host': WSSERVER_HOST, + 'port': WSSERVER_PORT, + }, 'database': database, 'keypair': { 'public': None, 'private': None, }, 'keyring': KEYRING.split(':'), - 'backlog_reassign_delay': 5 + 'backlog_reassign_delay': 5, + 'log': { + 'file': LOG_FILE, + 'error_file': log_config['handlers']['errors']['filename'], + 'level_console': 'debug', + 'level_logfile': logging.getLevelName( + log_config['handlers']['file']['level']).lower(), + 'datefmt_console': log_config['formatters']['console']['datefmt'], + 'datefmt_logfile': log_config['formatters']['file']['datefmt'], + 'fmt_console': log_config['formatters']['console']['format'], + 'fmt_logfile': log_config['formatters']['file']['format'], + 'granular_levels': {}, + }, } diff --git a/tests/test_consensus.py b/tests/test_consensus.py index 7310f514..e69de29b 100644 --- a/tests/test_consensus.py +++ b/tests/test_consensus.py @@ -1,40 +0,0 @@ - -def test_verify_vote_passes(b, structurally_valid_vote): - from bigchaindb.consensus import BaseConsensusRules - from bigchaindb.common import crypto - from bigchaindb.common.utils import serialize - vote_body = structurally_valid_vote['vote'] - vote_data = serialize(vote_body) - signature = crypto.PrivateKey(b.me_private).sign(vote_data.encode()) - vote_signed = { - 'node_pubkey': b.me, - 'signature': signature.decode(), - 'vote': vote_body - } - assert BaseConsensusRules.verify_vote([b.me], vote_signed) - - -def test_verify_vote_fails_signature(b, structurally_valid_vote): - from bigchaindb.consensus import BaseConsensusRules - vote_body = structurally_valid_vote['vote'] - vote_signed = { - 'node_pubkey': b.me, - 'signature': 'a' * 86, - 'vote': vote_body - } - assert not BaseConsensusRules.verify_vote([b.me], vote_signed) - - -def test_verify_vote_fails_schema(b): - from bigchaindb.consensus import BaseConsensusRules - from bigchaindb.common import crypto - from bigchaindb.common.utils import serialize - vote_body = {} - vote_data = serialize(vote_body) - signature = crypto.PrivateKey(b.me_private).sign(vote_data.encode()) - vote_signed = { - 'node_pubkey': b.me, - 'signature': signature.decode(), - 'vote': vote_body - } - assert not BaseConsensusRules.verify_vote([b.me], vote_signed) diff --git a/tests/test_core.py b/tests/test_core.py index 977db065..b8803e9b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -10,6 +10,8 @@ def config(request, monkeypatch): 'port': 28015, 'name': 'bigchain', 'replicaset': 'bigchain-rs', + 'connection_timeout': 5000, + 'max_tries': 3 }, 'keypair': { 'public': 'pubkey', @@ -80,11 +82,44 @@ def test_get_blocks_status_containing_tx(monkeypatch): bigchain.get_blocks_status_containing_tx('txid') -def test_has_previous_vote(monkeypatch): - from bigchaindb.core import Bigchain - monkeypatch.setattr( - 'bigchaindb.utils.verify_vote_signature', lambda voters, vote: False) - bigchain = Bigchain(public_key='pubkey', private_key='privkey') - block = {'votes': ({'node_pubkey': 'pubkey'},)} - with pytest.raises(Exception): - bigchain.has_previous_vote(block) +@pytest.mark.genesis +def test_get_spent_issue_1271(b, alice, bob, carol): + from bigchaindb.models import Transaction + + tx_1 = Transaction.create( + [carol.public_key], + [([carol.public_key], 8)], + ).sign([carol.private_key]) + + tx_2 = Transaction.transfer( + tx_1.to_inputs(), + [([bob.public_key], 2), + ([alice.public_key], 2), + ([carol.public_key], 4)], + asset_id=tx_1.id, + ).sign([carol.private_key]) + + tx_3 = Transaction.transfer( + tx_2.to_inputs()[2:3], + [([alice.public_key], 1), + ([carol.public_key], 3)], + asset_id=tx_1.id, + ).sign([carol.private_key]) + + tx_4 = Transaction.transfer( + tx_2.to_inputs()[1:2] + tx_3.to_inputs()[0:1], + [([bob.public_key], 3)], + asset_id=tx_1.id, + ).sign([alice.private_key]) + + tx_5 = Transaction.transfer( + tx_2.to_inputs()[0:1], + [([alice.public_key], 2)], + asset_id=tx_1.id, + ).sign([bob.private_key]) + block_5 = b.create_block([tx_1, tx_2, tx_3, tx_4, tx_5]) + b.write_block(block_5) + assert b.get_spent(tx_2.id, 0) == tx_5 + assert not b.get_spent(tx_5.id, 0) + assert b.get_outputs_filtered(alice.public_key) + assert b.get_outputs_filtered(alice.public_key, include_spent=False) diff --git a/tests/test_events.py b/tests/test_events.py new file mode 100644 index 00000000..22369b51 --- /dev/null +++ b/tests/test_events.py @@ -0,0 +1,21 @@ +def tests_event_handler(): + from bigchaindb.events import (EventTypes, Event, EventHandler, + setup_events_queue) + + # create and event + event_data = {'msg': 'some data'} + event = Event(EventTypes.BLOCK_VALID, event_data) + # create the events queue + events_queue = setup_events_queue() + + # create event handler + event_handler = EventHandler(events_queue) + + # push and event to the queue + event_handler.put_event(event) + + # get the event from the queue + event_from_queue = event_handler.get_event() + + assert event_from_queue.type == event.type + assert event_from_queue.data == event.data diff --git a/tests/test_models.py b/tests/test_models.py index 8de3a6c2..6e559cb2 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,22 +1,6 @@ from pytest import raises -class TestTransactionModel(object): - def test_validating_an_invalid_transaction(self, b): - from bigchaindb.models import Transaction - - tx = Transaction.create([b.me], [([b.me], 1)]) - tx.operation = 'something invalid' - - with raises(TypeError): - tx.validate(b) - - tx.operation = 'CREATE' - tx.inputs = [] - with raises(ValueError): - tx.validate(b) - - class TestBlockModel(object): def test_block_initialization(self, monkeypatch): from bigchaindb.models import Block @@ -61,11 +45,10 @@ class TestBlockModel(object): assert block.to_dict() == expected def test_block_invalid_serializaton(self): - from bigchaindb.common.exceptions import OperationError from bigchaindb.models import Block block = Block([]) - with raises(OperationError): + with raises(ValueError): block.to_dict() def test_block_deserialization(self, b): @@ -115,13 +98,12 @@ class TestBlockModel(object): transactions = [Transaction.create([b.me], [([b.me], 1)])] timestamp = gen_timestamp() - voters = ['Qaaa', 'Qbbb'] block = { 'timestamp': timestamp, 'transactions': [tx.to_dict() for tx in transactions], 'node_pubkey': b.me, - 'voters': voters, + 'voters': list(b.federation), } block_body = { @@ -163,3 +145,11 @@ class TestBlockModel(object): public_key = PublicKey(b.me) assert public_key.verify(expected_block_serialized, block.signature) + + def test_block_dupe_tx(self, b): + from bigchaindb.models import Transaction + from bigchaindb.common.exceptions import DuplicateTransaction + tx = Transaction.create([b.me], [([b.me], 1)]) + block = b.create_block([tx, tx]) + with raises(DuplicateTransaction): + block._validate_block(b) diff --git a/tests/test_processes.py b/tests/test_processes.py index bd69d52c..e6503541 100644 --- a/tests/test_processes.py +++ b/tests/test_processes.py @@ -9,14 +9,16 @@ from bigchaindb.pipelines import vote, block, election, stale @patch.object(block, 'start') @patch.object(vote, 'start') @patch.object(Process, 'start') -def test_processes_start(mock_vote, mock_block, mock_election, mock_stale, - mock_process): +@patch('bigchaindb.events.setup_events_queue', spec_set=True, autospec=True) +def test_processes_start(mock_setup_events_queue, mock_process, mock_vote, + mock_block, mock_election, mock_stale): from bigchaindb import processes processes.start() mock_vote.assert_called_with() mock_block.assert_called_with() - mock_election.assert_called_with() mock_stale.assert_called_with() mock_process.assert_called_with() + mock_election.assert_called_once_with( + events_queue=mock_setup_events_queue.return_value) diff --git a/tests/test_voting.py b/tests/test_voting.py new file mode 100644 index 00000000..06d56de1 --- /dev/null +++ b/tests/test_voting.py @@ -0,0 +1,248 @@ +import pytest +from unittest.mock import patch +from collections import Counter + +from bigchaindb.core import Bigchain +from bigchaindb.exceptions import CriticalDuplicateVote +from bigchaindb.voting import Voting, INVALID, VALID, UNDECIDED + + +################################################################################ +# Tests for checking vote eligibility + + +def test_partition_eligible_votes(): + class TestVoting(Voting): + @classmethod + def verify_vote_signature(cls, vote): + if vote['node_pubkey'] == 'invalid sig': + return False + if vote['node_pubkey'] == 'value error': + raise ValueError() + return True + + voters = ['valid', 'invalid sig', 'value error', 'not in set'] + votes = [{'node_pubkey': k} for k in voters] + + el, inel = TestVoting.partition_eligible_votes(votes, voters[:-1]) + assert el == [votes[0]] + assert inel == votes[1:] + + +################################################################################ +# Test vote counting + + +def test_count_votes(): + class TestVoting(Voting): + @classmethod + def verify_vote_schema(cls, vote): + return vote['node_pubkey'] != 'malformed' + + voters = (['says invalid', 'malformed'] + + ['kosher' + str(i) for i in range(10)]) + + votes = [Bigchain(v).vote('block', 'a', True) for v in voters] + votes[0]['vote']['is_block_valid'] = False + # Incorrect previous block subtracts from n_valid and adds to n_invalid + votes[-1]['vote']['previous_block'] = 'z' + + by_voter = dict(enumerate(votes)) + + assert TestVoting.count_votes(by_voter) == { + 'counts': { + 'n_valid': 9, # 9 kosher votes + 'n_invalid': 3, # 1 invalid, 1 malformed, 1 rogue prev block + }, + 'malformed': [votes[1]], + 'previous_block': 'a', + 'other_previous_block': {'z': 1}, + } + + +def test_must_agree_prev_block(): + class TestVoting(Voting): + @classmethod + def verify_vote_schema(cls, vote): + return True + + voters = 'abcd' + votes = [Bigchain(v).vote('block', 'a', True) for v in voters] + votes[0]['vote']['previous_block'] = 'b' + votes[1]['vote']['previous_block'] = 'c' + by_voter = dict(enumerate(votes)) + assert TestVoting.count_votes(by_voter) == { + 'counts': { + 'n_valid': 2, + 'n_invalid': 2, + }, + 'previous_block': 'a', + 'other_previous_block': {'b': 1, 'c': 1}, + 'malformed': [], + } + + +################################################################################ +# Tests for vote decision making + + +DECISION_TESTS = [ + {'n_voters': 1, 'n_valid': 1, 'n_invalid': 1}, + {'n_voters': 2, 'n_valid': 2, 'n_invalid': 1}, + {'n_voters': 3, 'n_valid': 2, 'n_invalid': 2}, + {'n_voters': 4, 'n_valid': 3, 'n_invalid': 2}, + {'n_voters': 5, 'n_valid': 3, 'n_invalid': 3}, + {'n_voters': 6, 'n_valid': 4, 'n_invalid': 3}, + {'n_voters': 7, 'n_valid': 4, 'n_invalid': 4}, + {'n_voters': 8, 'n_valid': 5, 'n_invalid': 4} +] + + +@pytest.mark.parametrize('kwargs', DECISION_TESTS) +def test_decide_votes_valid(kwargs): + kwargs = kwargs.copy() + kwargs['n_invalid'] = 0 + assert Voting.decide_votes(**kwargs) == VALID + kwargs['n_valid'] -= 1 + assert Voting.decide_votes(**kwargs) == UNDECIDED + + +@pytest.mark.parametrize('kwargs', DECISION_TESTS) +def test_decide_votes_invalid(kwargs): + kwargs = kwargs.copy() + kwargs['n_valid'] = 0 + assert Voting.decide_votes(**kwargs) == INVALID + kwargs['n_invalid'] -= 1 + assert Voting.decide_votes(**kwargs) == UNDECIDED + + +################################################################################ +# Actions - test state transitions + + +@pytest.mark.parametrize('n_voters', range(8)) +def test_vote_actions(n_voters): + """ + * Legal transitions are UNDECIDED -> [VALID|INVALID] only + * Block is never left UNDECIDED after voting + * Accomodates rogues on previous block / invalid schema + """ + class TestVoting(Voting): + @classmethod + def verify_vote_schema(cls, vote): + return type(vote['vote']['is_block_valid']) == bool + + @classmethod + def verify_vote_signature(cls, vote): + return True + + keyring = 'abcdefghijklmnopqrstuvwxyz'[:n_voters] + block = {'id': 'block', 'block': {'voters': keyring}} + state = UNDECIDED + todo = [(state, [], [])] + + def branch(p, r): + todo.append((state, votes, votes + [{ + 'node_pubkey': keyring[len(votes)], + 'vote': {'previous_block': p, 'is_block_valid': r} + }])) + + while todo: + prev_state, prev_votes, votes = todo.pop(0) + results = Counter(v['vote']['is_block_valid'] for v in votes) + prev_blocks = Counter(v['vote']['previous_block'] for v in votes) + majority = n_voters // 2 + 1 + honest = (len(votes) == majority and len(prev_blocks) == 1 and + not results['lol'] and len(results) == 1) + closed = len(votes) == n_voters + + # Test legal transition + if votes: + state = TestVoting.block_election(block, votes, keyring)['status'] + assert prev_state in [state, UNDECIDED] + + # Test that decision has been reached + if honest or closed: + assert state != UNDECIDED or n_voters == 0 + + if closed: + continue + + # Can accomodate more votes, add them to the todo list. + # This vote is the good case + branch('A', True) + # This vote disagrees on previous block + branch('B', True) + # This vote says the block is invalid + branch('A', False) + # This vote is invalid + branch('A', 'lol') + + +################################################################################ +# Tests for vote signature + + +def test_verify_vote_signature_passes(b): + vote = b.vote('block', 'a', True) + assert Voting.verify_vote_signature(vote) + vote['signature'] = '' + assert not Voting.verify_vote_signature(vote) + + +################################################################################ +# Tests for vote schema + + +def test_verify_vote_schema(b): + vote = b.vote('b' * 64, 'a' * 64, True) + assert Voting.verify_vote_schema(vote) + vote = b.vote('b' * 64, 'a', True) + assert not Voting.verify_vote_schema(vote) + vote = b.vote('b', 'a' * 64, True) + assert not Voting.verify_vote_schema(vote) + + +################################################################################ +# block_election tests + + +def test_block_election(b): + + class TestVoting(Voting): + @classmethod + def verify_vote_signature(cls, vote): + return True + + @classmethod + def verify_vote_schema(cls, vote): + return True + + keyring = 'abc' + block = {'id': 'xyz', 'block': {'voters': 'ab'}} + votes = [{ + 'node_pubkey': c, + 'vote': {'is_block_valid': True, 'previous_block': 'a'} + } for c in 'abc'] + + assert TestVoting.block_election(block, votes, keyring) == { + 'status': VALID, + 'block_id': 'xyz', + 'counts': {'n_valid': 2, 'n_invalid': 0}, + 'ineligible': [votes[-1]], + 'malformed': [], + 'previous_block': 'a', + 'other_previous_block': {}, + } + + +@patch('bigchaindb.voting.Voting.verify_vote_signature', return_value=True) +def test_duplicate_vote_throws_critical_error(b): + keyring = 'abc' + block = {'id': 'xyz', 'block': {'voters': 'ab'}} + votes = [{ + 'node_pubkey': c, + 'vote': {'is_block_valid': True, 'previous_block': 'a'} + } for c in 'aabc'] + with pytest.raises(CriticalDuplicateVote): + Voting.block_election(block, votes, keyring) diff --git a/tests/web/test_info.py b/tests/web/test_info.py index c55f467f..292b1b74 100644 --- a/tests/web/test_info.py +++ b/tests/web/test_info.py @@ -23,7 +23,7 @@ def test_api_root_endpoint(client): def test_api_v1_endpoint(client): res = client.get('/api/v1') docs_url = ['https://docs.bigchaindb.com/projects/server/en/vtsttst', - '/drivers-clients/http-client-server-api.html', + '/http-client-server-api.html', ] assert res.json == { '_links': { @@ -31,5 +31,6 @@ def test_api_v1_endpoint(client): 'self': 'http://localhost/api/v1/', 'statuses': 'http://localhost/api/v1/statuses/', 'transactions': 'http://localhost/api/v1/transactions/', + 'streams_v1': 'ws://localhost:9985/api/v1/streams/valid_tx', } } diff --git a/tests/web/test_outputs.py b/tests/web/test_outputs.py index fd17d46d..b5a02f76 100644 --- a/tests/web/test_outputs.py +++ b/tests/web/test_outputs.py @@ -47,3 +47,68 @@ def test_get_outputs_endpoint_with_invalid_unspent(client, user_pk): res = client.get(OUTPUTS_ENDPOINT + params) assert expected == res.json assert res.status_code == 400 + + +@pytest.mark.bdb +@pytest.mark.usefixtures('inputs') +def test_get_divisble_transactions_returns_500(b, client): + from bigchaindb.models import Transaction + from bigchaindb.common import crypto + import json + + TX_ENDPOINT = '/api/v1/transactions' + + def mine(tx_list): + block = b.create_block(tx_list) + b.write_block(block) + + # vote the block valid + vote = b.vote(block.id, b.get_last_voted_block().id, True) + b.write_vote(vote) + + alice_priv, alice_pub = crypto.generate_key_pair() + bob_priv, bob_pub = crypto.generate_key_pair() + carly_priv, carly_pub = crypto.generate_key_pair() + + create_tx = Transaction.create([alice_pub], [([alice_pub], 4)]) + create_tx.sign([alice_priv]) + + res = client.post(TX_ENDPOINT, data=json.dumps(create_tx.to_dict())) + assert res.status_code == 202 + + mine([create_tx]) + + transfer_tx = Transaction.transfer(create_tx.to_inputs(), + [([alice_pub], 3), ([bob_pub], 1)], + asset_id=create_tx.id) + transfer_tx.sign([alice_priv]) + + res = client.post(TX_ENDPOINT, data=json.dumps(transfer_tx.to_dict())) + assert res.status_code == 202 + + mine([transfer_tx]) + + transfer_tx_carly = Transaction.transfer([transfer_tx.to_inputs()[1]], + [([carly_pub], 1)], + asset_id=create_tx.id) + transfer_tx_carly.sign([bob_priv]) + + res = client.post(TX_ENDPOINT, data=json.dumps(transfer_tx_carly.to_dict())) + assert res.status_code == 202 + + mine([transfer_tx_carly]) + + asset_id = create_tx.id + + url = TX_ENDPOINT + '?asset_id=' + asset_id + assert client.get(url).status_code == 200 + assert len(client.get(url).json) == 3 + + url = OUTPUTS_ENDPOINT + '?public_key=' + alice_pub + assert client.get(url).status_code == 200 + + url = OUTPUTS_ENDPOINT + '?public_key=' + bob_pub + assert client.get(url).status_code == 200 + + url = OUTPUTS_ENDPOINT + '?public_key=' + carly_pub + assert client.get(url).status_code == 200 diff --git a/tests/web/test_statuses.py b/tests/web/test_statuses.py index af9d09d3..716cc0d2 100644 --- a/tests/web/test_statuses.py +++ b/tests/web/test_statuses.py @@ -30,7 +30,7 @@ def test_get_block_status_endpoint_undecided(b, client): block = b.create_block([tx]) b.write_block(block) - status = b.block_election_status(block.id, block.voters) + status = b.block_election_status(block) res = client.get(STATUSES_ENDPOINT + '?block_id=' + block.id) assert status == res.json['status'] @@ -51,7 +51,7 @@ def test_get_block_status_endpoint_valid(b, client): vote = b.vote(block.id, b.get_last_voted_block().id, True) b.write_vote(vote) - status = b.block_election_status(block.id, block.voters) + status = b.block_election_status(block) res = client.get(STATUSES_ENDPOINT + '?block_id=' + block.id) assert status == res.json['status'] @@ -72,7 +72,7 @@ def test_get_block_status_endpoint_invalid(b, client): vote = b.vote(block.id, b.get_last_voted_block().id, False) b.write_vote(vote) - status = b.block_election_status(block.id, block.voters) + status = b.block_election_status(block) res = client.get(STATUSES_ENDPOINT + '?block_id=' + block.id) assert status == res.json['status'] diff --git a/tests/web/test_transactions.py b/tests/web/test_transactions.py index 71f4f0e9..4c6e76c1 100644 --- a/tests/web/test_transactions.py +++ b/tests/web/test_transactions.py @@ -1,4 +1,3 @@ -import builtins import json from unittest.mock import patch @@ -45,7 +44,8 @@ def test_post_create_transaction_endpoint(b, client): assert res.json['outputs'][0]['public_keys'][0] == user_pub -def test_post_create_transaction_with_invalid_id(b, client, caplog): +@patch('bigchaindb.web.views.base.logger') +def test_post_create_transaction_with_invalid_id(mock_logger, b, client): from bigchaindb.common.exceptions import InvalidHash from bigchaindb.models import Transaction user_priv, user_pub = crypto.generate_key_pair() @@ -57,16 +57,29 @@ def test_post_create_transaction_with_invalid_id(b, client, caplog): res = client.post(TX_ENDPOINT, data=json.dumps(tx)) expected_status_code = 400 expected_error_message = ( - 'Invalid transaction ({}): The transaction\'s id \'{}\' isn\'t equal to ' - 'the hash of its body, i.e. it\'s not valid.' + "Invalid transaction ({}): The transaction's id '{}' isn't equal to " + "the hash of its body, i.e. it's not valid." ).format(InvalidHash.__name__, tx['id']) assert res.status_code == expected_status_code assert res.json['message'] == expected_error_message - assert caplog.records[0].args['status'] == expected_status_code - assert caplog.records[0].args['message'] == expected_error_message + assert mock_logger.error.called + assert ( + 'HTTP API error: %(status)s - %(message)s' in + mock_logger.error.call_args[0] + ) + assert ( + {'message': expected_error_message, 'status': expected_status_code} in + mock_logger.error.call_args[0] + ) + # TODO put back caplog based asserts once possible + # assert caplog.records[0].args['status'] == expected_status_code + # assert caplog.records[0].args['message'] == expected_error_message -def test_post_create_transaction_with_invalid_signature(b, client, caplog): +@patch('bigchaindb.web.views.base.logger') +def test_post_create_transaction_with_invalid_signature(mock_logger, + b, + client): from bigchaindb.common.exceptions import InvalidSignature from bigchaindb.models import Transaction user_priv, user_pub = crypto.generate_key_pair() @@ -83,8 +96,18 @@ def test_post_create_transaction_with_invalid_signature(b, client, caplog): ).format(InvalidSignature.__name__) assert res.status_code == expected_status_code assert res.json['message'] == expected_error_message - assert caplog.records[0].args['status'] == expected_status_code - assert caplog.records[0].args['message'] == expected_error_message + assert mock_logger.error.called + assert ( + 'HTTP API error: %(status)s - %(message)s' in + mock_logger.error.call_args[0] + ) + assert ( + {'message': expected_error_message, 'status': expected_status_code} in + mock_logger.error.call_args[0] + ) + # TODO put back caplog based asserts once possible + # assert caplog.records[0].args['status'] == expected_status_code + # assert caplog.records[0].args['message'] == expected_error_message def test_post_create_transaction_with_invalid_structure(client): @@ -92,7 +115,8 @@ def test_post_create_transaction_with_invalid_structure(client): assert res.status_code == 400 -def test_post_create_transaction_with_invalid_schema(client, caplog): +@patch('bigchaindb.web.views.base.logger') +def test_post_create_transaction_with_invalid_schema(mock_logger, client): from bigchaindb.models import Transaction user_priv, user_pub = crypto.generate_key_pair() tx = Transaction.create( @@ -104,8 +128,18 @@ def test_post_create_transaction_with_invalid_schema(client, caplog): "Invalid transaction schema: 'version' is a required property") assert res.status_code == expected_status_code assert res.json['message'] == expected_error_message - assert caplog.records[0].args['status'] == expected_status_code - assert caplog.records[0].args['message'] == expected_error_message + assert mock_logger.error.called + assert ( + 'HTTP API error: %(status)s - %(message)s' in + mock_logger.error.call_args[0] + ) + assert ( + {'message': expected_error_message, 'status': expected_status_code} in + mock_logger.error.call_args[0] + ) + # TODO put back caplog based asserts once possible + # assert caplog.records[0].args['status'] == expected_status_code + # assert caplog.records[0].args['message'] == expected_error_message @pytest.mark.parametrize('exc,msg', ( @@ -113,18 +147,16 @@ def test_post_create_transaction_with_invalid_schema(client, caplog): ('DoubleSpend', 'Nope! It is gone now!'), ('InvalidHash', 'Do not smoke that!'), ('InvalidSignature', 'Falsche Unterschrift!'), - ('OperationError', 'Create and transfer!'), - ('TransactionDoesNotExist', 'Hallucinations?'), + ('ValidationError', 'Create and transfer!'), + ('InputDoesNotExist', 'Hallucinations?'), ('TransactionOwnerError', 'Not yours!'), ('TransactionNotInValidBlock', 'Wait, maybe?'), - ('ValueError', '?'), + ('ValidationError', '?'), )) -def test_post_invalid_transaction(client, exc, msg, monkeypatch, caplog): +@patch('bigchaindb.web.views.base.logger') +def test_post_invalid_transaction(mock_logger, client, exc, msg, monkeypatch,): from bigchaindb.common import exceptions - try: - exc_cls = getattr(exceptions, exc) - except AttributeError: - exc_cls = getattr(builtins, 'ValueError') + exc_cls = getattr(exceptions, exc) def mock_validation(self_, tx): raise exc_cls(msg) @@ -139,8 +171,18 @@ def test_post_invalid_transaction(client, exc, msg, monkeypatch, caplog): assert res.status_code == expected_status_code assert (res.json['message'] == 'Invalid transaction ({}): {}'.format(exc, msg)) - assert caplog.records[2].args['status'] == expected_status_code - assert caplog.records[2].args['message'] == expected_error_message + assert mock_logger.error.called + assert ( + 'HTTP API error: %(status)s - %(message)s' in + mock_logger.error.call_args[0] + ) + assert ( + {'message': expected_error_message, 'status': expected_status_code} in + mock_logger.error.call_args[0] + ) + # TODO put back caplog based asserts once possible + # assert caplog.records[2].args['status'] == expected_status_code + # assert caplog.records[2].args['message'] == expected_error_message @pytest.mark.bdb diff --git a/tests/web/test_votes.py b/tests/web/test_votes.py index bae31b9a..0bdd1081 100644 --- a/tests/web/test_votes.py +++ b/tests/web/test_votes.py @@ -27,6 +27,8 @@ def test_get_votes_endpoint(b, client): @pytest.mark.bdb @pytest.mark.usefixtures('inputs') def test_get_votes_endpoint_multiple_votes(b, client): + from bigchaindb.common.crypto import generate_key_pair + tx = Transaction.create([b.me], [([b.me], 1)]) tx = tx.sign([b.me_private]) @@ -37,8 +39,12 @@ def test_get_votes_endpoint_multiple_votes(b, client): vote_valid = b.vote(block.id, last_block, True) b.write_vote(vote_valid) - # vote the block valid + # vote the block invalid + # a note can only vote once so we need a new node_pubkey for the second + # vote + _, pk = generate_key_pair() vote_invalid = b.vote(block.id, last_block, False) + vote_invalid['node_pubkey'] = pk b.write_vote(vote_invalid) res = client.get(VOTES_ENDPOINT + '?block_id=' + block.id) diff --git a/tests/web/test_websocket_server.py b/tests/web/test_websocket_server.py new file mode 100644 index 00000000..f25e183f --- /dev/null +++ b/tests/web/test_websocket_server.py @@ -0,0 +1,187 @@ +import asyncio +import json +import queue +import random +import threading +from unittest.mock import patch + +import pytest + + +@pytest.fixture +def _block(b, request): + from bigchaindb.models import Transaction + total = getattr(request, 'param', 1) + transactions = [ + Transaction.create( + [b.me], + [([b.me], 1)], + metadata={'msg': random.random()}, + ).sign([b.me_private]) + for _ in range(total) + ] + return b.create_block(transactions) + + +class MockWebSocket: + def __init__(self): + self.received = [] + + def send_str(self, s): + self.received.append(s) + + +@asyncio.coroutine +def test_bridge_sync_async_queue(loop): + from bigchaindb.web.websocket_server import _multiprocessing_to_asyncio + + sync_queue = queue.Queue() + async_queue = asyncio.Queue(loop=loop) + + bridge = threading.Thread(target=_multiprocessing_to_asyncio, + args=(sync_queue, async_queue, loop), + daemon=True) + bridge.start() + + sync_queue.put('fahren') + sync_queue.put('auf') + sync_queue.put('der') + sync_queue.put('Autobahn') + + result = yield from async_queue.get() + assert result == 'fahren' + + result = yield from async_queue.get() + assert result == 'auf' + + result = yield from async_queue.get() + assert result == 'der' + + result = yield from async_queue.get() + assert result == 'Autobahn' + + assert async_queue.qsize() == 0 + + +@patch('threading.Thread') +@patch('aiohttp.web.run_app') +@patch('bigchaindb.web.websocket_server.init_app') +@patch('asyncio.get_event_loop', return_value='event-loop') +@patch('asyncio.Queue', return_value='event-queue') +def test_start_creates_an_event_loop(queue_mock, get_event_loop_mock, + init_app_mock, run_app_mock, + thread_mock): + from bigchaindb import config + from bigchaindb.web.websocket_server import start, _multiprocessing_to_asyncio + + start(None) + thread_mock.assert_called_once_with( + target=_multiprocessing_to_asyncio, + args=(None, queue_mock.return_value, get_event_loop_mock.return_value), + daemon=True, + ) + thread_mock.return_value.start.assert_called_once_with() + init_app_mock.assert_called_with('event-queue', loop='event-loop') + run_app_mock.assert_called_once_with( + init_app_mock.return_value, + host=config['wsserver']['host'], + port=config['wsserver']['port'], + ) + + +@asyncio.coroutine +def test_websocket_string_event(test_client, loop): + from bigchaindb.web.websocket_server import init_app, POISON_PILL, EVENTS_ENDPOINT + + event_source = asyncio.Queue(loop=loop) + app = init_app(event_source, loop=loop) + client = yield from test_client(app) + ws = yield from client.ws_connect(EVENTS_ENDPOINT) + + yield from event_source.put('hack') + yield from event_source.put('the') + yield from event_source.put('planet!') + + result = yield from ws.receive() + assert result.data == 'hack' + + result = yield from ws.receive() + assert result.data == 'the' + + result = yield from ws.receive() + assert result.data == 'planet!' + + yield from event_source.put(POISON_PILL) + + +@asyncio.coroutine +@pytest.mark.parametrize('_block', (10,), indirect=('_block',), ids=('block',)) +def test_websocket_block_event(b, _block, test_client, loop): + from bigchaindb import events + from bigchaindb.web.websocket_server import init_app, POISON_PILL, EVENTS_ENDPOINT + + event_source = asyncio.Queue(loop=loop) + app = init_app(event_source, loop=loop) + client = yield from test_client(app) + ws = yield from client.ws_connect(EVENTS_ENDPOINT) + block = _block.to_dict() + block_event = events.Event(events.EventTypes.BLOCK_VALID, block) + + yield from event_source.put(block_event) + + for tx in block['block']['transactions']: + result = yield from ws.receive() + json_result = json.loads(result.data) + assert json_result['tx_id'] == tx['id'] + # Since the transactions are all CREATEs, asset id == transaction id + assert json_result['asset_id'] == tx['id'] + assert json_result['block_id'] == block['id'] + + yield from event_source.put(POISON_PILL) + + +@pytest.mark.skip('Processes are not stopping properly, and the whole test suite would hang') +@pytest.mark.genesis +def test_integration_from_webapi_to_websocket(monkeypatch, client, loop): + # XXX: I think that the `pytest-aiohttp` plugin is sparkling too much + # magic in the `asyncio` module: running this test without monkey-patching + # `asycio.get_event_loop` (and without the `loop` fixture) raises a: + # RuntimeError: There is no current event loop in thread 'MainThread'. + # + # That's pretty weird because this test doesn't use the pytest-aiohttp + # plugin explicitely. + monkeypatch.setattr('asyncio.get_event_loop', lambda: loop) + + import json + import random + import aiohttp + + from bigchaindb.common import crypto + from bigchaindb import processes + from bigchaindb.models import Transaction + + # Start BigchainDB + processes.start() + + loop = asyncio.get_event_loop() + + import time + time.sleep(1) + + ws_url = client.get('http://localhost:9984/api/v1/').json['_links']['streams_v1'] + + # Connect to the WebSocket endpoint + session = aiohttp.ClientSession() + ws = loop.run_until_complete(session.ws_connect(ws_url)) + + # Create a keypair and generate a new asset + user_priv, user_pub = crypto.generate_key_pair() + asset = {'random': random.random()} + tx = Transaction.create([user_pub], [([user_pub], 1)], asset=asset) + tx = tx.sign([user_priv]) + # Post the transaction to the BigchainDB Web API + client.post('/api/v1/transactions/', data=json.dumps(tx.to_dict())) + + result = loop.run_until_complete(ws.receive()) + json_result = json.loads(result.data) + assert json_result['tx_id'] == tx.id diff --git a/tox.ini b/tox.ini index d2cd2a2c..bdaea034 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,9 @@ [tox] skipsdist = true -envlist = py{34,35}-{rethinkdb,mongodb}, flake8, docsroot, docsserver +envlist = py{34,35,36}-{rethinkdb,mongodb}, flake8, docsroot, docsserver [base] -basepython = python3.5 +basepython = python3.6 deps = pip>=9.0.1 [testenv]