Merge branch 'master' into feature/improve-docker-configuration

2024-10-13 13:34:05 +00:00 · 2016-04-28 15:48:19 +02:00 · 2016-04-28 15:48:19 +02:00 · c9773b3f2c
commit c9773b3f2c
parent f5da2af872 ec0662bdc7
10 changed files with 229 additions and 57 deletions
--- a/.gitignore
+++ b/.gitignore
@ -71,3 +71,4 @@ target/
 # Some files created when deploying a cluster on AWS
 deploy-cluster-aws/conf/rethinkdb.conf
 deploy-cluster-aws/hostlist.py
+deploy-cluster-aws/confiles/
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -14,10 +14,10 @@ For reference, the possible headings are:
 * **External Contributors** to list contributors outside of ascribe GmbH.


-## [Unreleased] - YYYY-MM-DD
-Tag name: TBD
-= commit: TBD
-committed: TBD
+## [0.2.0] - 2016-04-26
+Tag name: v0.2.0
+= commit: 0c4a2b380aabdcf50fa2d7fb351c290aaedc3db7
+committed: April 26, 2016, 11:09 AM GMT+2

 ### Added
 - Ability to use environment variables to set (or partially set) configuration settings: [Pull Request #153](https://github.com/bigchaindb/bigchaindb/pull/153)
@ -50,6 +50,7 @@ committed: TBD

 ### Fixed
 - Bug related to config overwrite: [Pull Request #97](https://github.com/bigchaindb/bigchaindb/pull/97)
+- Bug related to running the `bigchaindb-benchmark load` on docker [Pull Request #225](https://github.com/bigchaindb/bigchaindb/pull/225)

 ## External Contributors
 - [@thedoctor](https://github.com/thedoctor): Pull Requests 
--- a/bigchaindb/version.py
+++ b/bigchaindb/version.py
@ -1,2 +1,2 @@
-__version__ = '0.1.5'
-__short_version__ = '0.1'
+__version__ = '0.2.0'
+__short_version__ = '0.2'
--- a/deploy-cluster-aws/clusterize_confiles.py
+++ b/deploy-cluster-aws/clusterize_confiles.py
@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+"""Given a directory full of default BigchainDB config files,
+transform them into config files for a cluster with proper
+keyrings, API endpoint values, etc.
+
+Note: This script assumes that there is a file named hostlist.py
+containing public_dns_names = a list of the public DNS names of
+all the hosts in the cluster.
+
+Usage:
+    python clusterize_confiles.py <dir> <number_of_files>
+"""
+
+from __future__ import unicode_literals
+import os
+import json
+import argparse
+
+from hostlist import public_dns_names
+
+
+# Parse the command-line arguments
+parser = argparse.ArgumentParser()
+parser.add_argument('dir',
+                    help='Directory containing the config files')
+parser.add_argument('number_of_files',
+                    help='Number of config files expected in dir',
+                    type=int)
+args = parser.parse_args()
+
+conf_dir = args.dir
+numfiles_expected = int(args.number_of_files)
+
+# Check if the number of files in conf_dir is what was expected
+conf_files = os.listdir(conf_dir)
+numfiles = len(conf_files)
+if numfiles != numfiles_expected:
+    raise ValueError('There are {} files in {} but {} were expected'.
+                     format(numfiles, conf_dir, numfiles_expected))
+
+# Make a list containing all the public keys from
+# all the config files
+pubkeys = []
+for filename in conf_files:
+    file_path = os.path.join(conf_dir, filename)
+    with open(file_path, 'r') as f:
+        conf_dict = json.load(f)
+        pubkey = conf_dict['keypair']['public']
+        pubkeys.append(pubkey)
+
+# Rewrite each config file, one at a time
+for i, filename in enumerate(conf_files):
+    file_path = os.path.join(conf_dir, filename)
+    with open(file_path, 'r') as f:
+        conf_dict = json.load(f)
+        # The keyring is the list of *all* public keys
+        # minus the config file's own public key
+        keyring = list(pubkeys)
+        keyring.remove(conf_dict['keypair']['public'])
+        conf_dict['keyring'] = keyring
+        # Allow incoming server traffic from any IP address
+        # to port 9984
+        conf_dict['server']['bind'] = '0.0.0.0:9984'
+        # Set the api_endpoint
+        conf_dict['api_endpoint'] = 'http://' + public_dns_names[i] + \
+                                    ':9984/api/v1'
+    # Delete the config file
+    os.remove(file_path)
+    # Write new config file with the same filename
+    print('Rewriting {}'.format(file_path))
+    with open(file_path, 'w') as f2:
+        json.dump(conf_dict, f2)
--- a/deploy-cluster-aws/fabfile.py
+++ b/deploy-cluster-aws/fabfile.py
@ -5,7 +5,7 @@ BigchainDB, including its storage backend (RethinkDB).

 from __future__ import with_statement, unicode_literals

-from fabric.api import sudo, env
+from fabric.api import sudo, env, hosts
 from fabric.api import task, parallel
 from fabric.contrib.files import sed
 from fabric.operations import run, put
@ -32,24 +32,19 @@ newrelic_license_key = 'you_need_a_real_license_key'

 # DON'T PUT @parallel
@task
-def set_hosts(hosts):
-    """A helper function to change env.hosts from the
-    command line.
+def set_host(host_index):
+    """A helper task to change env.hosts from the
+    command line. It will only "stick" for the duration
+    of the fab command that called it.

    Args:
-        hosts (str): 'one_node' or 'two_nodes'
-
+        host_index (int): 0, 1, 2, 3, etc.
    Example:
-        fab set_hosts:one_node init_bigchaindb
+        fab set_host:4 fab_task_A fab_task_B
+        will set env.hosts = [public_dns_names[4]]
+        but only for doing fab_task_A and fab_task_B
    """
-    if hosts == 'one_node':
-        env.hosts = public_dns_names[:1]
-    elif hosts == 'two_nodes':
-        env.hosts = public_dns_names[:2]
-    else:
-        raise ValueError('Invalid input to set_hosts.'
-                         ' Expected one_node or two_nodes.'
-                         ' Got {}'.format(hosts))
+    env.hosts = [public_dns_names[int(host_index)]]


 # Install base software
@ -138,13 +133,26 @@ def configure_bigchaindb():
    run('bigchaindb -y configure', pty=False)


+# Send the specified configuration file to
+# the remote host and save it there in
+# ~/.bigchaindb
+# Use in conjunction with set_host()
+# No @parallel
+@task
+def send_confile(confile):
+    put('confiles/' + confile, 'tempfile')
+    run('mv tempfile ~/.bigchaindb')
+    print('For this node, bigchaindb show-config says:')
+    run('bigchaindb show-config')
+
+
 # Initialize BigchainDB
 # i.e. create the database, the tables,
 # the indexes, and the genesis block.
-# (This only needs to be run on one node.)
-# Call using:
-#     fab set_hosts:one_node init_bigchaindb
+# (The @hosts decorator is used to make this
+# task run on only one node. See http://tinyurl.com/h9qqf3t )
@task
+@hosts(public_dns_names[0])
 def init_bigchaindb():
    run('bigchaindb init', pty=False)

--- a/deploy-cluster-aws/launch_ec2_nodes.py
+++ b/deploy-cluster-aws/launch_ec2_nodes.py
@ -14,6 +14,7 @@
 from __future__ import unicode_literals
 import sys
 import time
+import socket
 import argparse
 import botocore
 import boto3
@ -192,8 +193,27 @@ with open('hostlist.py', 'w') as f:
    f.write('\n')
    f.write('public_dns_names = {}\n'.format(public_dns_names))

-# Wait
-wait_time = 45
-print('Waiting {} seconds to make sure all instances are ready...'.
-      format(wait_time))
-time.sleep(wait_time)
+
+# For each node in the cluster, check port 22 (ssh) until it's reachable
+for instance in instances_with_tag:
+    ip_address = instance.public_ip_address
+    # Create a socket
+    # Address Family: AF_INET (means IPv4)
+    # Type: SOCK_STREAM (means connection-oriented TCP protocol)
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    print('Attempting to connect to {} on port 22 (ssh)...'.
+          format(ip_address))
+    unreachable = True
+    while unreachable:
+        try:
+            # Open a connection to the remote node on port 22
+            s.connect((ip_address, 22))
+        except socket.error as e:
+            print('  Socket error: {}'.format(e))
+            print('  Trying again in 3 seconds')
+            time.sleep(3.0)
+        else:
+            print('  Port 22 is reachable!')
+            s.shutdown(socket.SHUT_WR)
+            s.close()
+            unreachable = False
--- a/deploy-cluster-aws/make_confiles.sh
+++ b/deploy-cluster-aws/make_confiles.sh
@ -0,0 +1,40 @@
+#! /bin/bash
+
+# The set -e option instructs bash to immediately exit
+# if any command has a non-zero exit status
+set -e
+
+function printErr()
+    {
+        echo "usage: ./make_confiles.sh <dir> <number_of_files>"
+        echo "No argument $1 supplied"
+    }
+
+if [ -z "$1" ]; then
+    printErr "<dir>"
+    exit 1
+fi
+
+if [ -z "$2" ]; then
+    printErr "<number_of_files>"
+    exit 1
+fi
+
+CONFDIR=$1
+NUMFILES=$2
+
+# If $CONFDIR exists, remove it
+if [ -d "$CONFDIR" ]; then
+    rm -rf $CONFDIR
+fi
+
+# Create $CONFDIR
+mkdir $CONFDIR
+
+# Use the bigchaindb configure command to create
+# $NUMFILES BigchainDB config files in $CONFDIR
+for (( i=0; i<$NUMFILES; i++ )); do
+    CONPATH=$CONFDIR"/bcdb_conf"$i
+    echo "Writing "$CONPATH
+    bigchaindb -y -c $CONPATH configure
+done
--- a/deploy-cluster-aws/startup.sh
+++ b/deploy-cluster-aws/startup.sh
@ -21,7 +21,7 @@ if [ -z "$2" ]; then
 fi

 TAG=$1
-NODES=$2
+NUM_NODES=$2

 # If they don't include a third argument (<pypi_or_branch>)
 # then assume BRANCH = "pypi" by default
@ -38,6 +38,13 @@ if [ ! -f "pem/bigchaindb.pem" ]; then
    exit 1
 fi

+# Check for the confiles directory
+if [ ! -d "confiles" ]; then
+    echo "Directory confiles is needed but does not exist"
+    echo "See make_confiles.sh to find out how to make it"
+    exit 1
+fi
+
 # Change the file permissions on pem/bigchaindb.pem
 # so that the owner can read it, but that's all
 chmod 0400 pem/bigchaindb.pem
@ -52,7 +59,7 @@ chmod 0400 pem/bigchaindb.pem
 # 5. writes the shellscript add2known_hosts.sh
 # 6. (over)writes a file named hostlist.py
 #    containing a list of all public DNS names.
-python launch_ec2_nodes.py --tag $TAG --nodes $NODES 
+python launch_ec2_nodes.py --tag $TAG --nodes $NUM_NODES

 # Make add2known_hosts.sh executable then execute it.
 # This adds remote keys to ~/.ssh/known_hosts
@ -86,22 +93,38 @@ else
 fi

 # Configure BigchainDB on all nodes
-fab configure_bigchaindb

-# TODO: Get public keys from all nodes
+# The idea is to send a bunch of locally-created configuration
+# files out to each of the instances / nodes.

+# Assume a set of $NUM_NODES BigchaindB config files
+# already exists in the confiles directory.
+# One can create a set using a command like
+# ./make_confiles.sh confiles $NUM_NODES
+# (We can't do that here now because this virtual environment
+# is a Python 2 environment that may not even have
+# bigchaindb installed, so bigchaindb configure can't be called)

-# TODO: Add list of public keys to keyring of all nodes
+# Transform the config files in the confiles directory
+# to have proper keyrings, api_endpoint values, etc.
+python clusterize_confiles.py confiles $NUM_NODES

+# Send one of the config files to each instance
+for (( HOST=0 ; HOST<$NUM_NODES ; HOST++ )); do
+    CONFILE="bcdb_conf"$HOST
+    echo "Sending "$CONFILE
+    fab set_host:$HOST send_confile:$CONFILE
+done

-# Send a "bigchaindb init" command to one node
-# to initialize the BigchainDB database 
-# i.e. create the database, the tables,
-# the indexes, and the genesis block.
-fab set_hosts:one_node init_bigchaindb
+# Initialize BigchainDB (i.e. Create the RethinkDB database,
+# the tables, the indexes, and genesis glock). Note that
+# this will only be sent to one of the nodes, see the
+# definition of init_bigchaindb() in fabfile.py to see why.
+fab init_bigchaindb

 # Start BigchainDB on all the nodes using "screen"
 fab start_bigchaindb

 # cleanup
 rm add2known_hosts.sh
+# rm -rf temp_confs
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -14,6 +14,7 @@ rethinkdb-data:

 bigchaindb:
  build: .
+  container_name: docker-bigchaindb
  volumes:
    - ./bigchaindb:/usr/src/app/bigchaindb
    - ./tests:/usr/src/app/tests
--- a/docs/source/deploy-on-aws.md
+++ b/docs/source/deploy-on-aws.md
@ -83,16 +83,35 @@ Add some rules for Inbound traffic:
 **Note: These rules are extremely lax! They're meant to make testing easy.** You'll want to tighten them up if you intend to have a secure cluster. For example, Source = 0.0.0.0/0 is [CIDR notation](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) for "allow this traffic to come from _any_ IP address."


-## Deployment
+## AWS Deployment

-Here's an example of how one could launch a BigchainDB cluster of four nodes tagged `wrigley` on AWS:
+### AWS Deployment Step 1
+
+Suppose _N_ is the number of nodes you want in your BigchainDB cluster. If you already have a set of _N_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory, then you can jump to step 2. To create such a set, you can do something like:
 ```text
+# in a Python 3 virtual environment where bigchaindb is installed
 cd bigchaindb
 cd deploy-cluster-aws
-./startup.sh wrigley 4 pypi
+./make_confiles.sh confiles 3
 ```

-The `pypi` on the end means that it will install the latest (stable) `bigchaindb` package from the [Python Package Index (PyPI)](https://pypi.python.org/pypi). That is, on each instance, BigchainDB is installed using `pip install bigchaindb`. 
+That will create three (3) _default_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory (which will be created if it doesn't already exist). The three files will be named `bcdb_conf0`, `bcdb_conf1`, and `bcdb_conf2`.
+
+You can look inside those files if you're curious. In step 2, they'll be modified. For example, the default keyring is an empty list. In step 2, the deployment script automatically changes the keyring of each node to be a list of the public keys of all other nodes. Other changes are also made.
+
+### AWS Deployment Step 2
+
+Step 2 is to launch the nodes ("instances") on AWS, to install all the necessary software on them, configure the software, run the software, and more.
+
+Here's an example of how one could launch a BigchainDB cluster of three (3) nodes tagged `wrigley` on AWS:
+```text
+# in a Python 2.5-2.7 virtual environment where fabric, boto3, etc. are installed
+cd bigchaindb
+cd deploy-cluster-aws
+./startup.sh wrigley 3 pypi
+```
+
+The `pypi` on the end means that it will install the latest (stable) `bigchaindb` package from the [Python Package Index (PyPI)](https://pypi.python.org/pypi). That is, on each node, BigchainDB is installed using `pip install bigchaindb`. 

 `startup.sh` is a Bash script which calls some Python and Fabric scripts. The usage is:
 ```text
@ -101,20 +120,7 @@ The `pypi` on the end means that it will install the latest (stable) `bigchaindb

 The first two arguments are self-explanatory. The third argument can be `pypi` or the name of a local Git branch (e.g. `master` or `feat/3752/quote-asimov-on-tuesdays`). If you don't include a third argument, then `pypi` will be assumed by default.

-Here's what the `startup.sh` script does; it:
-
-0. allocates more elastic IP addresses if necessary,
-1. launches the specified number of nodes (instances) on Amazon EC2,
-2. tags them with the specified tag,
-3. waits until those instances exist and are running,
-4. for each instance, it associates an elastic IP address with that instance,
-5. adds remote keys to `~/.ssh/known_hosts`,
-6. (re)creates the RethinkDB configuration file `conf/rethinkdb.conf`,
-7. installs base (prerequisite) software on all instances,
-8. installs RethinkDB on all instances,
-9. installs BigchainDB on all instances,
-10. initializes the BigchainDB database,
-11. starts BigchainDB on all instances.
+If you're curious what the `startup.sh` script does, the source code has lots of explanatory comments, so it's quite easy to read. Here's a link to the latest version on GitHub: [`startup.sh`](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/startup.sh)

 It should take a few minutes for the deployment to finish. If you run into problems, see the section on Known Deployment Issues below.