diff --git a/deploy-cluster-aws/awsdeploy.sh b/deploy-cluster-aws/awsdeploy.sh index 6290d01e..06c8574b 100755 --- a/deploy-cluster-aws/awsdeploy.sh +++ b/deploy-cluster-aws/awsdeploy.sh @@ -4,38 +4,35 @@ # if any command has a non-zero exit status set -e -USAGE="usage: ./awsdeploy.sh " - +# Check for the first command-line argument +# (the name of the AWS deployment config file) if [ -z "$1" ]; then - echo $USAGE - echo "No first argument was specified" - echo "It should be a number like 3 or 15" - exit 1 -else - NUM_NODES=$1 -fi - -if [ -z "$2" ]; then - echo $USAGE - echo "No second argument was specified, so BigchainDB will be installed from PyPI" - BRANCH="pypi" -else - BRANCH=$2 -fi - -if [ -z "$3" ]; then - echo $USAGE - echo "No third argument was specified, so servers will be deployed" - WHAT_TO_DEPLOY="servers" -else - WHAT_TO_DEPLOY=$3 -fi - -if [[ ("$WHAT_TO_DEPLOY" != "servers") && ("$WHAT_TO_DEPLOY" != "clients") ]]; then - echo "The third argument, if included, must be servers or clients" + # no first argument was provided + echo "awsdeploy: missing file operand" + echo "Usage: awsdeploy DEPLOY_CONF_FILE" + echo "Deploy BigchainDB on AWS using the specified AWS deployment configuration file" exit 1 fi +DEPLOY_CONF_FILE=$1 + +# Check to make sure DEPLOY_CONF_FILE exists +if [ ! -f "$DEPLOY_CONF_FILE" ]; then + echo "AWS deployment configuration file not found: "$DEPLOY_CONF_FILE + exit 1 +fi + +# Read DEPLOY_CONF_FILE +# to set environment variables related to AWS deployment +echo "Reading "$DEPLOY_CONF_FILE +source $DEPLOY_CONF_FILE +echo "NUM_NODES = "$NUM_NODES +echo "BRANCH = "$BRANCH +echo "WHAT_TO_DEPLOY = "$WHAT_TO_DEPLOY +echo "USE_KEYPAIRS_FILE = "$USE_KEYPAIRS_FILE +echo "IMAGE_ID = "$IMAGE_ID +echo "INSTANCE_TYPE = "$INSTANCE_TYPE + # Check for AWS private key file (.pem file) if [ ! -f "pem/bigchaindb.pem" ]; then echo "File pem/bigchaindb.pem (AWS private key) is missing" @@ -49,6 +46,21 @@ if [ ! -d "confiles" ]; then exit 1 fi +# Check if NUM_NODES got set +if [ -z "$NUM_NODES" ]; then + echo "NUM_NODES is not set in the AWS deployment configuration file "$DEPLOY_CONF_FILE + exit 1 +fi + +# Check if the number of files in confiles directory == NUM_NODES +CONFILES_COUNT=`ls confiles | wc -l` +if [[ $CONFILES_COUNT != $NUM_NODES ]]; then + echo "ERROR: CONFILES_COUNT = "$CONFILES_COUNT + echo "but NUM_NODES = "$NUM_NODES + echo "so there should be "$NUM_NODES" files in the confiles directory" + exit 1 +fi + # Auto-generate the tag to apply to all nodes in the cluster TAG="BDB-"$WHAT_TO_DEPLOY"-"`date +%m-%d@%H:%M` echo "TAG = "$TAG @@ -67,7 +79,7 @@ chmod 0400 pem/bigchaindb.pem # 5. writes the shellscript add2known_hosts.sh # 6. (over)writes a file named hostlist.py # containing a list of all public DNS names. -python launch_ec2_nodes.py --tag $TAG --nodes $NUM_NODES +python launch_ec2_nodes.py --deploy-conf-file $DEPLOY_CONF_FILE --tag $TAG # Make add2known_hosts.sh executable then execute it. # This adds remote keys to ~/.ssh/known_hosts @@ -117,7 +129,11 @@ if [ "$WHAT_TO_DEPLOY" == "servers" ]; then # Transform the config files in the confiles directory # to have proper keyrings, api_endpoint values, etc. - python clusterize_confiles.py confiles $NUM_NODES + if [ "$USE_KEYPAIRS_FILE" == "True" ]; then + python clusterize_confiles.py -k confiles $NUM_NODES + else + python clusterize_confiles.py confiles $NUM_NODES + fi # Send one of the config files to each instance for (( HOST=0 ; HOST<$NUM_NODES ; HOST++ )); do diff --git a/deploy-cluster-aws/clusterize_confiles.py b/deploy-cluster-aws/clusterize_confiles.py index d1fcb6ee..f266fe75 100644 --- a/deploy-cluster-aws/clusterize_confiles.py +++ b/deploy-cluster-aws/clusterize_confiles.py @@ -67,7 +67,7 @@ if use_keypairs: # Make a list containing all the public keys if use_keypairs: print('Using keypairs from keypairs.py') - pubkeys = [keypair[1] for keypair in keypairs_list] + pubkeys = [keypair[1] for keypair in keypairs_list[:num_files]] else: # read the pubkeys from the config files in conf_dir pubkeys = [] diff --git a/deploy-cluster-aws/example_deploy_conf.py b/deploy-cluster-aws/example_deploy_conf.py new file mode 100644 index 00000000..994e14b5 --- /dev/null +++ b/deploy-cluster-aws/example_deploy_conf.py @@ -0,0 +1,51 @@ +# AWS deployment config file + +# To use in a Bash shell script: +# source example_deploy_conf.py +# # $EXAMPLEVAR now has a value + +# To use in a Python script: +# from example_deploy_conf import * +# or +# import importlib +# cf = importlib.import_module('example_deploy_conf') +# # cf.EXAMPLEVAR now has a value + +# DON'T PUT SPACES AROUND THE = +# because that would confuse Bash. +# Example values: "string in double quotes", 32, True, False + +# NUM_NODES is the number of nodes to deploy +NUM_NODES=3 + +# PYPI_OR_BRANCH is either "pypi" or the name of a local Git branch +# (e.g. "master" or "feat/3627/optional-delimiter-in-txfile") +# It's where to get the BigchainDB code to be deployed on the nodes +BRANCH="master" + +# WHAT_TO_DEPLOY is either "servers" or "clients" +# What do you want to deploy? +WHAT_TO_DEPLOY="servers" + +# USE_KEYPAIRS_FILE is either True or False +# Should node keypairs be read from keypairs.py? +# (If False, then the keypairs will be whatever is in the the +# BigchainDB config files in the confiles directory.) +USE_KEYPAIRS_FILE=False + +# IMAGE_ID is the Amazon Machine Image (AMI) id to use +# in all the servers/instances to be launched. +# Examples: +# "ami-accff2b1" = An Ubuntu 14.04.2 LTX "Ubuntu Cloud image" from Canonical +# 64-bit, hvm-ssd, published to eu-central-1 +# See http://tinyurl.com/hkjhg46 +# "ami-596b7235" = Ubuntu with IOPS storage? Does this work? +# +# See http://cloud-images.ubuntu.com/releases/14.04/release-20150325/ +IMAGE_ID="ami-accff2b1" + +# INSTANCE_TYPE is the type of AWS instance to launch +# i.e. How many CPUs do you want? How much storage? etc. +# Examples: "m3.2xlarge", "c3.8xlarge", "c4.8xlarge" +# For all options, see https://aws.amazon.com/ec2/instance-types/ +INSTANCE_TYPE="m3.2xlarge" diff --git a/deploy-cluster-aws/launch_ec2_nodes.py b/deploy-cluster-aws/launch_ec2_nodes.py index 2196114c..c5c6c5f1 100644 --- a/deploy-cluster-aws/launch_ec2_nodes.py +++ b/deploy-cluster-aws/launch_ec2_nodes.py @@ -16,12 +16,22 @@ import sys import time import socket import argparse +import importlib import botocore import boto3 + from awscommon import get_naeips -# First, ensure they're using Python 2.5-2.7 +SETTINGS = ['NUM_NODES', 'BRANCH', 'WHAT_TO_DEPLOY', 'USE_KEYPAIRS_FILE', + 'IMAGE_ID', 'INSTANCE_TYPE'] + + +class SettingsTypeError(TypeError): + pass + + +# Ensure they're using Python 2.5-2.7 pyver = sys.version_info major = pyver[0] minor = pyver[1] @@ -36,14 +46,54 @@ parser = argparse.ArgumentParser() parser.add_argument("--tag", help="tag to add to all launched instances on AWS", required=True) -parser.add_argument("--nodes", - help="number of nodes in the cluster", - required=True, - type=int) +parser.add_argument("--deploy-conf-file", + help="AWS deployment configuration file", + required=True) args = parser.parse_args() - tag = args.tag -num_nodes = int(args.nodes) +deploy_conf_file = args.deploy_conf_file + +# Import all the variables set in the AWS deployment configuration file +# (Remove the '.py' from the end of deploy_conf_file.) +cf = importlib.import_module(deploy_conf_file[:-3]) + +dir_cf = dir(cf) # = a list of the attributes of cf +for setting in SETTINGS: + if setting not in dir_cf: + sys.exit('{} was not set '.format(setting) + + 'in the specified AWS deployment ' + 'configuration file {}'.format(deploy_conf_file)) + exec('{0} = cf.{0}'.format(setting)) + +# Validate the variables set in the AWS deployment configuration file +if not isinstance(NUM_NODES, int): + raise SettingsTypeError('NUM_NODES should be an int') + +if not isinstance(BRANCH, str): + raise SettingsTypeError('BRANCH should be a string') + +if not isinstance(WHAT_TO_DEPLOY, str): + raise SettingsTypeError('WHAT_TO_DEPLOY should be a string') + +if not isinstance(USE_KEYPAIRS_FILE, bool): + msg = 'USE_KEYPAIRS_FILE should a boolean (True or False)' + raise SettingsTypeError(msg) + +if not isinstance(IMAGE_ID, str): + raise SettingsTypeError('IMAGE_ID should be a string') + +if not isinstance(INSTANCE_TYPE, str): + raise SettingsTypeError('INSTANCE_TYPE should be a string') + +if NUM_NODES > 64: + raise ValueError('NUM_NODES should be less than or equal to 64. ' + 'The AWS deployment configuration file sets it to {}'. + format(NUM_NODES)) + +if WHAT_TO_DEPLOY not in ['servers', 'clients']: + raise ValueError('WHAT_TO_DEPLOY should be either "servers" or "clients". ' + 'The AWS deployment configuration file sets it to {}'. + format(WHAT_TO_DEPLOY)) # Get an AWS EC2 "resource" # See http://boto3.readthedocs.org/en/latest/guide/resources.html @@ -81,10 +131,10 @@ print('You have {} allocated elastic IPs which are ' 'not already associated with instances'. format(len(non_associated_eips))) -if num_nodes > len(non_associated_eips): - num_eips_to_allocate = num_nodes - len(non_associated_eips) +if NUM_NODES > len(non_associated_eips): + num_eips_to_allocate = NUM_NODES - len(non_associated_eips) print('You want to launch {} instances'. - format(num_nodes)) + format(NUM_NODES)) print('so {} more elastic IPs must be allocated'. format(num_eips_to_allocate)) for _ in range(num_eips_to_allocate): @@ -103,22 +153,19 @@ if num_nodes > len(non_associated_eips): raise print('Commencing launch of {} instances on Amazon EC2...'. - format(num_nodes)) + format(NUM_NODES)) -for _ in range(num_nodes): +for _ in range(NUM_NODES): # Request the launch of one instance at a time # (so list_of_instances should contain only one item) list_of_instances = ec2.create_instances( - ImageId='ami-accff2b1', # ubuntu-image - # 'ami-596b7235', # ubuntu w/ iops storage - MinCount=1, - MaxCount=1, - KeyName='bigchaindb', - InstanceType='m3.2xlarge', - # 'c3.8xlarge', - # 'c4.8xlarge', - SecurityGroupIds=['bigchaindb'] - ) + ImageId=IMAGE_ID, + MinCount=1, + MaxCount=1, + KeyName='bigchaindb', + InstanceType=INSTANCE_TYPE, + SecurityGroupIds=['bigchaindb'] + ) # Tag the just-launched instances (should be just one) for instance in list_of_instances: diff --git a/docs/source/deploy-on-aws.md b/docs/source/deploy-on-aws.md index 8fd9b840..fb0442a3 100644 --- a/docs/source/deploy-on-aws.md +++ b/docs/source/deploy-on-aws.md @@ -131,7 +131,7 @@ To configure a BigchainDB node to send monitoring data to the monitoring server, ### Step 1 -Suppose _N_ is the number of nodes you want in your BigchainDB cluster. If you already have a set of _N_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory, then you can jump to step 2. To create such a set, you can do something like: +Suppose _N_ is the number of nodes you want in your BigchainDB cluster. If you already have a set of _N_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory, then you can jump to the next step. To create such a set, you can do something like: ```text # in a Python 3 virtual environment where bigchaindb is installed cd bigchaindb @@ -141,11 +141,23 @@ cd deploy-cluster-aws That will create three (3) _default_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory (which will be created if it doesn't already exist). The three files will be named `bcdb_conf0`, `bcdb_conf1`, and `bcdb_conf2`. -You can look inside those files if you're curious. In step 2, they'll be modified. For example, the default keyring is an empty list. In step 2, the deployment script automatically changes the keyring of each node to be a list of the public keys of all other nodes. Other changes are also made. +You can look inside those files if you're curious. For example, the default keyring is an empty list. Later, the deployment script automatically changes the keyring of each node to be a list of the public keys of all other nodes. Other changes are also made. That is, the configuration files generated in this step are _not_ what will be sent to the deployed nodes; they're just a starting point. -**An Aside on Using a Standard Set of Keypairs** +### Step 2 -It's possible to deploy BigchainDB servers with a known set of keypairs. You can generate a set of keypairs in a file named `keypairs.py` using the `write_keypairs_file.py` script. For example: +Step 2 is to make an AWS deployment configuration file, if necessary. There's an example AWS configuration file named `example_deploy_conf.py`. It has many comments explaining each setting. The settings in that file are (or should be): +```text +NUM_NODES=3 +BRANCH="master" +WHAT_TO_DEPLOY="servers" +USE_KEYPAIRS_FILE=False +IMAGE_ID="ami-accff2b1" +INSTANCE_TYPE="m3.2xlarge" +``` + +If you're happy with those settings, then you can skip to the next step. Otherwise, you could make a copy of `example_deploy_conf.py` (e.g. `cp example_deploy_conf.py my_deploy_conf.py`) and then edit the copy using a text editor. + +If you want your nodes to have a predictable set of pre-generated keypairs, then you should 1) set `USE_KEYPAIRS_FILE=True` in the AWS deployment configuration file, and 2) provide a `keypairs.py` file containing enough keypairs for all of your nodes. You can generate a `keypairs.py` file using the `write_keypairs_file.py` script. For example: ```text # in a Python 3 virtual environment where bigchaindb is installed cd bigchaindb @@ -153,45 +165,24 @@ cd deploy-cluster-aws python3 write_keypairs_file.py 100 ``` -The above command generates a file with 100 keypairs. (You can generate more keypairs than you need, so you can use the same list over and over again, for different numbers of servers.) To make the `awsdeploy.sh` script read all keys from `keypairs.py`, you must _edit_ the `awsdeploy.sh` script: change the line that says `python clusterize_confiles.py confiles $NUM_NODES` to `python clusterize_confiles.py -k confiles $NUM_NODES` (i.e. add the `-k` option). +The above command generates a `keypairs.py` file with 100 keypairs. You can generate more keypairs than you need, so you can use the same list over and over again, for different numbers of servers. The deployment scripts will only use the first NUM_NODES keypairs. -### Step 2 +### Step 3 -Step 2 is to launch the nodes ("instances") on AWS, to install all the necessary software on them, configure the software, run the software, and more. +Step 3 is to launch the nodes ("instances") on AWS, to install all the necessary software on them, configure the software, run the software, and more. Here's how you'd do that: -Here's an example of how one could launch a BigchainDB cluster of three (3) nodes on AWS: ```text # in a Python 2.5-2.7 virtual environment where fabric, boto3, etc. are installed cd bigchaindb cd deploy-cluster-aws -./awsdeploy.sh 3 +./awsdeploy.sh my_deploy_conf.py +# Only if you want to start BigchainDB on all the nodes: fab start_bigchaindb ``` -`awsdeploy.sh` is a Bash script which calls some Python and Fabric scripts. The usage is: -```text -./awsdeploy.sh [pypi_or_branch] [servers_or_clients] -``` +`awsdeploy.sh` is a Bash script which calls some Python and Fabric scripts. If you're curious what it does, [the source code](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/awsdeploy.sh) has many explanatory comments. -**** (Required) - -The number of nodes you want to deploy. Example value: 5 - -**[pypi_or_branch]** (Optional) - -Where the nodes should get their BigchainDB source code. If it's `pypi`, then BigchainDB will be installed from the latest `bigchaindb` package in the [Python Package Index (PyPI)](https://pypi.python.org/pypi). That is, on each node, BigchainDB will be installed using `pip install bigchaindb`. You can also put the name of a local Git branch; it will be compressed and sent out to all the nodes for installation. If you don't include the second argument, then the default is `pypi`. - -**[servers_or_clients]** (Optional) - -If you want to deploy BigchainDB servers, then the third argument should be `servers`. -If you want to deploy BigchainDB clients, then the third argument should be `clients`. -The third argument is optional, but if you want to include it, you must also include the second argument. If you don't include the third argument, then the default is `servers`. - ---- - -If you're curious what the `awsdeploy.sh` script does, [the source code](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/awsdeploy.sh) has lots of explanatory comments, so it's quite easy to read. - -It should take a few minutes for the deployment to finish. If you run into problems, see the section on Known Deployment Issues below. +It should take a few minutes for the deployment to finish. If you run into problems, see the section on **Known Deployment Issues** below. The EC2 Console has a section where you can see all the instances you have running on EC2. You can `ssh` into a running instance using a command like: ```text