Merge pull request #291 from bigchaindb/feat/280/deploy_conf_file_for_aws_deployment_config

Feat/280/deploy conf file for aws deployment config
This commit is contained in:
Troy McConaghy 2016-05-17 12:58:25 +02:00
commit bf6298cf17
5 changed files with 190 additions and 85 deletions

View File

@ -4,38 +4,35 @@
# if any command has a non-zero exit status # if any command has a non-zero exit status
set -e set -e
USAGE="usage: ./awsdeploy.sh <number_of_nodes_in_cluster> <pypi_or_branch> <servers_or_clients>" # Check for the first command-line argument
# (the name of the AWS deployment config file)
if [ -z "$1" ]; then if [ -z "$1" ]; then
echo $USAGE # no first argument was provided
echo "No first argument was specified" echo "awsdeploy: missing file operand"
echo "It should be a number like 3 or 15" echo "Usage: awsdeploy DEPLOY_CONF_FILE"
exit 1 echo "Deploy BigchainDB on AWS using the specified AWS deployment configuration file"
else
NUM_NODES=$1
fi
if [ -z "$2" ]; then
echo $USAGE
echo "No second argument was specified, so BigchainDB will be installed from PyPI"
BRANCH="pypi"
else
BRANCH=$2
fi
if [ -z "$3" ]; then
echo $USAGE
echo "No third argument was specified, so servers will be deployed"
WHAT_TO_DEPLOY="servers"
else
WHAT_TO_DEPLOY=$3
fi
if [[ ("$WHAT_TO_DEPLOY" != "servers") && ("$WHAT_TO_DEPLOY" != "clients") ]]; then
echo "The third argument, if included, must be servers or clients"
exit 1 exit 1
fi fi
DEPLOY_CONF_FILE=$1
# Check to make sure DEPLOY_CONF_FILE exists
if [ ! -f "$DEPLOY_CONF_FILE" ]; then
echo "AWS deployment configuration file not found: "$DEPLOY_CONF_FILE
exit 1
fi
# Read DEPLOY_CONF_FILE
# to set environment variables related to AWS deployment
echo "Reading "$DEPLOY_CONF_FILE
source $DEPLOY_CONF_FILE
echo "NUM_NODES = "$NUM_NODES
echo "BRANCH = "$BRANCH
echo "WHAT_TO_DEPLOY = "$WHAT_TO_DEPLOY
echo "USE_KEYPAIRS_FILE = "$USE_KEYPAIRS_FILE
echo "IMAGE_ID = "$IMAGE_ID
echo "INSTANCE_TYPE = "$INSTANCE_TYPE
# Check for AWS private key file (.pem file) # Check for AWS private key file (.pem file)
if [ ! -f "pem/bigchaindb.pem" ]; then if [ ! -f "pem/bigchaindb.pem" ]; then
echo "File pem/bigchaindb.pem (AWS private key) is missing" echo "File pem/bigchaindb.pem (AWS private key) is missing"
@ -49,6 +46,21 @@ if [ ! -d "confiles" ]; then
exit 1 exit 1
fi fi
# Check if NUM_NODES got set
if [ -z "$NUM_NODES" ]; then
echo "NUM_NODES is not set in the AWS deployment configuration file "$DEPLOY_CONF_FILE
exit 1
fi
# Check if the number of files in confiles directory == NUM_NODES
CONFILES_COUNT=`ls confiles | wc -l`
if [[ $CONFILES_COUNT != $NUM_NODES ]]; then
echo "ERROR: CONFILES_COUNT = "$CONFILES_COUNT
echo "but NUM_NODES = "$NUM_NODES
echo "so there should be "$NUM_NODES" files in the confiles directory"
exit 1
fi
# Auto-generate the tag to apply to all nodes in the cluster # Auto-generate the tag to apply to all nodes in the cluster
TAG="BDB-"$WHAT_TO_DEPLOY"-"`date +%m-%d@%H:%M` TAG="BDB-"$WHAT_TO_DEPLOY"-"`date +%m-%d@%H:%M`
echo "TAG = "$TAG echo "TAG = "$TAG
@ -67,7 +79,7 @@ chmod 0400 pem/bigchaindb.pem
# 5. writes the shellscript add2known_hosts.sh # 5. writes the shellscript add2known_hosts.sh
# 6. (over)writes a file named hostlist.py # 6. (over)writes a file named hostlist.py
# containing a list of all public DNS names. # containing a list of all public DNS names.
python launch_ec2_nodes.py --tag $TAG --nodes $NUM_NODES python launch_ec2_nodes.py --deploy-conf-file $DEPLOY_CONF_FILE --tag $TAG
# Make add2known_hosts.sh executable then execute it. # Make add2known_hosts.sh executable then execute it.
# This adds remote keys to ~/.ssh/known_hosts # This adds remote keys to ~/.ssh/known_hosts
@ -117,7 +129,11 @@ if [ "$WHAT_TO_DEPLOY" == "servers" ]; then
# Transform the config files in the confiles directory # Transform the config files in the confiles directory
# to have proper keyrings, api_endpoint values, etc. # to have proper keyrings, api_endpoint values, etc.
python clusterize_confiles.py confiles $NUM_NODES if [ "$USE_KEYPAIRS_FILE" == "True" ]; then
python clusterize_confiles.py -k confiles $NUM_NODES
else
python clusterize_confiles.py confiles $NUM_NODES
fi
# Send one of the config files to each instance # Send one of the config files to each instance
for (( HOST=0 ; HOST<$NUM_NODES ; HOST++ )); do for (( HOST=0 ; HOST<$NUM_NODES ; HOST++ )); do

View File

@ -67,7 +67,7 @@ if use_keypairs:
# Make a list containing all the public keys # Make a list containing all the public keys
if use_keypairs: if use_keypairs:
print('Using keypairs from keypairs.py') print('Using keypairs from keypairs.py')
pubkeys = [keypair[1] for keypair in keypairs_list] pubkeys = [keypair[1] for keypair in keypairs_list[:num_files]]
else: else:
# read the pubkeys from the config files in conf_dir # read the pubkeys from the config files in conf_dir
pubkeys = [] pubkeys = []

View File

@ -0,0 +1,51 @@
# AWS deployment config file
# To use in a Bash shell script:
# source example_deploy_conf.py
# # $EXAMPLEVAR now has a value
# To use in a Python script:
# from example_deploy_conf import *
# or
# import importlib
# cf = importlib.import_module('example_deploy_conf')
# # cf.EXAMPLEVAR now has a value
# DON'T PUT SPACES AROUND THE =
# because that would confuse Bash.
# Example values: "string in double quotes", 32, True, False
# NUM_NODES is the number of nodes to deploy
NUM_NODES=3
# PYPI_OR_BRANCH is either "pypi" or the name of a local Git branch
# (e.g. "master" or "feat/3627/optional-delimiter-in-txfile")
# It's where to get the BigchainDB code to be deployed on the nodes
BRANCH="master"
# WHAT_TO_DEPLOY is either "servers" or "clients"
# What do you want to deploy?
WHAT_TO_DEPLOY="servers"
# USE_KEYPAIRS_FILE is either True or False
# Should node keypairs be read from keypairs.py?
# (If False, then the keypairs will be whatever is in the the
# BigchainDB config files in the confiles directory.)
USE_KEYPAIRS_FILE=False
# IMAGE_ID is the Amazon Machine Image (AMI) id to use
# in all the servers/instances to be launched.
# Examples:
# "ami-accff2b1" = An Ubuntu 14.04.2 LTX "Ubuntu Cloud image" from Canonical
# 64-bit, hvm-ssd, published to eu-central-1
# See http://tinyurl.com/hkjhg46
# "ami-596b7235" = Ubuntu with IOPS storage? Does this work?
#
# See http://cloud-images.ubuntu.com/releases/14.04/release-20150325/
IMAGE_ID="ami-accff2b1"
# INSTANCE_TYPE is the type of AWS instance to launch
# i.e. How many CPUs do you want? How much storage? etc.
# Examples: "m3.2xlarge", "c3.8xlarge", "c4.8xlarge"
# For all options, see https://aws.amazon.com/ec2/instance-types/
INSTANCE_TYPE="m3.2xlarge"

View File

@ -16,12 +16,22 @@ import sys
import time import time
import socket import socket
import argparse import argparse
import importlib
import botocore import botocore
import boto3 import boto3
from awscommon import get_naeips from awscommon import get_naeips
# First, ensure they're using Python 2.5-2.7 SETTINGS = ['NUM_NODES', 'BRANCH', 'WHAT_TO_DEPLOY', 'USE_KEYPAIRS_FILE',
'IMAGE_ID', 'INSTANCE_TYPE']
class SettingsTypeError(TypeError):
pass
# Ensure they're using Python 2.5-2.7
pyver = sys.version_info pyver = sys.version_info
major = pyver[0] major = pyver[0]
minor = pyver[1] minor = pyver[1]
@ -36,14 +46,54 @@ parser = argparse.ArgumentParser()
parser.add_argument("--tag", parser.add_argument("--tag",
help="tag to add to all launched instances on AWS", help="tag to add to all launched instances on AWS",
required=True) required=True)
parser.add_argument("--nodes", parser.add_argument("--deploy-conf-file",
help="number of nodes in the cluster", help="AWS deployment configuration file",
required=True, required=True)
type=int)
args = parser.parse_args() args = parser.parse_args()
tag = args.tag tag = args.tag
num_nodes = int(args.nodes) deploy_conf_file = args.deploy_conf_file
# Import all the variables set in the AWS deployment configuration file
# (Remove the '.py' from the end of deploy_conf_file.)
cf = importlib.import_module(deploy_conf_file[:-3])
dir_cf = dir(cf) # = a list of the attributes of cf
for setting in SETTINGS:
if setting not in dir_cf:
sys.exit('{} was not set '.format(setting) +
'in the specified AWS deployment '
'configuration file {}'.format(deploy_conf_file))
exec('{0} = cf.{0}'.format(setting))
# Validate the variables set in the AWS deployment configuration file
if not isinstance(NUM_NODES, int):
raise SettingsTypeError('NUM_NODES should be an int')
if not isinstance(BRANCH, str):
raise SettingsTypeError('BRANCH should be a string')
if not isinstance(WHAT_TO_DEPLOY, str):
raise SettingsTypeError('WHAT_TO_DEPLOY should be a string')
if not isinstance(USE_KEYPAIRS_FILE, bool):
msg = 'USE_KEYPAIRS_FILE should a boolean (True or False)'
raise SettingsTypeError(msg)
if not isinstance(IMAGE_ID, str):
raise SettingsTypeError('IMAGE_ID should be a string')
if not isinstance(INSTANCE_TYPE, str):
raise SettingsTypeError('INSTANCE_TYPE should be a string')
if NUM_NODES > 64:
raise ValueError('NUM_NODES should be less than or equal to 64. '
'The AWS deployment configuration file sets it to {}'.
format(NUM_NODES))
if WHAT_TO_DEPLOY not in ['servers', 'clients']:
raise ValueError('WHAT_TO_DEPLOY should be either "servers" or "clients". '
'The AWS deployment configuration file sets it to {}'.
format(WHAT_TO_DEPLOY))
# Get an AWS EC2 "resource" # Get an AWS EC2 "resource"
# See http://boto3.readthedocs.org/en/latest/guide/resources.html # See http://boto3.readthedocs.org/en/latest/guide/resources.html
@ -81,10 +131,10 @@ print('You have {} allocated elastic IPs which are '
'not already associated with instances'. 'not already associated with instances'.
format(len(non_associated_eips))) format(len(non_associated_eips)))
if num_nodes > len(non_associated_eips): if NUM_NODES > len(non_associated_eips):
num_eips_to_allocate = num_nodes - len(non_associated_eips) num_eips_to_allocate = NUM_NODES - len(non_associated_eips)
print('You want to launch {} instances'. print('You want to launch {} instances'.
format(num_nodes)) format(NUM_NODES))
print('so {} more elastic IPs must be allocated'. print('so {} more elastic IPs must be allocated'.
format(num_eips_to_allocate)) format(num_eips_to_allocate))
for _ in range(num_eips_to_allocate): for _ in range(num_eips_to_allocate):
@ -103,22 +153,19 @@ if num_nodes > len(non_associated_eips):
raise raise
print('Commencing launch of {} instances on Amazon EC2...'. print('Commencing launch of {} instances on Amazon EC2...'.
format(num_nodes)) format(NUM_NODES))
for _ in range(num_nodes): for _ in range(NUM_NODES):
# Request the launch of one instance at a time # Request the launch of one instance at a time
# (so list_of_instances should contain only one item) # (so list_of_instances should contain only one item)
list_of_instances = ec2.create_instances( list_of_instances = ec2.create_instances(
ImageId='ami-accff2b1', # ubuntu-image ImageId=IMAGE_ID,
# 'ami-596b7235', # ubuntu w/ iops storage MinCount=1,
MinCount=1, MaxCount=1,
MaxCount=1, KeyName='bigchaindb',
KeyName='bigchaindb', InstanceType=INSTANCE_TYPE,
InstanceType='m3.2xlarge', SecurityGroupIds=['bigchaindb']
# 'c3.8xlarge', )
# 'c4.8xlarge',
SecurityGroupIds=['bigchaindb']
)
# Tag the just-launched instances (should be just one) # Tag the just-launched instances (should be just one)
for instance in list_of_instances: for instance in list_of_instances:

View File

@ -131,7 +131,7 @@ To configure a BigchainDB node to send monitoring data to the monitoring server,
### Step 1 ### Step 1
Suppose _N_ is the number of nodes you want in your BigchainDB cluster. If you already have a set of _N_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory, then you can jump to step 2. To create such a set, you can do something like: Suppose _N_ is the number of nodes you want in your BigchainDB cluster. If you already have a set of _N_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory, then you can jump to the next step. To create such a set, you can do something like:
```text ```text
# in a Python 3 virtual environment where bigchaindb is installed # in a Python 3 virtual environment where bigchaindb is installed
cd bigchaindb cd bigchaindb
@ -141,11 +141,23 @@ cd deploy-cluster-aws
That will create three (3) _default_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory (which will be created if it doesn't already exist). The three files will be named `bcdb_conf0`, `bcdb_conf1`, and `bcdb_conf2`. That will create three (3) _default_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory (which will be created if it doesn't already exist). The three files will be named `bcdb_conf0`, `bcdb_conf1`, and `bcdb_conf2`.
You can look inside those files if you're curious. In step 2, they'll be modified. For example, the default keyring is an empty list. In step 2, the deployment script automatically changes the keyring of each node to be a list of the public keys of all other nodes. Other changes are also made. You can look inside those files if you're curious. For example, the default keyring is an empty list. Later, the deployment script automatically changes the keyring of each node to be a list of the public keys of all other nodes. Other changes are also made. That is, the configuration files generated in this step are _not_ what will be sent to the deployed nodes; they're just a starting point.
**An Aside on Using a Standard Set of Keypairs** ### Step 2
It's possible to deploy BigchainDB servers with a known set of keypairs. You can generate a set of keypairs in a file named `keypairs.py` using the `write_keypairs_file.py` script. For example: Step 2 is to make an AWS deployment configuration file, if necessary. There's an example AWS configuration file named `example_deploy_conf.py`. It has many comments explaining each setting. The settings in that file are (or should be):
```text
NUM_NODES=3
BRANCH="master"
WHAT_TO_DEPLOY="servers"
USE_KEYPAIRS_FILE=False
IMAGE_ID="ami-accff2b1"
INSTANCE_TYPE="m3.2xlarge"
```
If you're happy with those settings, then you can skip to the next step. Otherwise, you could make a copy of `example_deploy_conf.py` (e.g. `cp example_deploy_conf.py my_deploy_conf.py`) and then edit the copy using a text editor.
If you want your nodes to have a predictable set of pre-generated keypairs, then you should 1) set `USE_KEYPAIRS_FILE=True` in the AWS deployment configuration file, and 2) provide a `keypairs.py` file containing enough keypairs for all of your nodes. You can generate a `keypairs.py` file using the `write_keypairs_file.py` script. For example:
```text ```text
# in a Python 3 virtual environment where bigchaindb is installed # in a Python 3 virtual environment where bigchaindb is installed
cd bigchaindb cd bigchaindb
@ -153,45 +165,24 @@ cd deploy-cluster-aws
python3 write_keypairs_file.py 100 python3 write_keypairs_file.py 100
``` ```
The above command generates a file with 100 keypairs. (You can generate more keypairs than you need, so you can use the same list over and over again, for different numbers of servers.) To make the `awsdeploy.sh` script read all keys from `keypairs.py`, you must _edit_ the `awsdeploy.sh` script: change the line that says `python clusterize_confiles.py confiles $NUM_NODES` to `python clusterize_confiles.py -k confiles $NUM_NODES` (i.e. add the `-k` option). The above command generates a `keypairs.py` file with 100 keypairs. You can generate more keypairs than you need, so you can use the same list over and over again, for different numbers of servers. The deployment scripts will only use the first NUM_NODES keypairs.
### Step 2 ### Step 3
Step 2 is to launch the nodes ("instances") on AWS, to install all the necessary software on them, configure the software, run the software, and more. Step 3 is to launch the nodes ("instances") on AWS, to install all the necessary software on them, configure the software, run the software, and more. Here's how you'd do that:
Here's an example of how one could launch a BigchainDB cluster of three (3) nodes on AWS:
```text ```text
# in a Python 2.5-2.7 virtual environment where fabric, boto3, etc. are installed # in a Python 2.5-2.7 virtual environment where fabric, boto3, etc. are installed
cd bigchaindb cd bigchaindb
cd deploy-cluster-aws cd deploy-cluster-aws
./awsdeploy.sh 3 ./awsdeploy.sh my_deploy_conf.py
# Only if you want to start BigchainDB on all the nodes:
fab start_bigchaindb fab start_bigchaindb
``` ```
`awsdeploy.sh` is a Bash script which calls some Python and Fabric scripts. The usage is: `awsdeploy.sh` is a Bash script which calls some Python and Fabric scripts. If you're curious what it does, [the source code](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/awsdeploy.sh) has many explanatory comments.
```text
./awsdeploy.sh <number_of_nodes_in_cluster> [pypi_or_branch] [servers_or_clients]
```
**<number_of_nodes_in_cluster>** (Required) It should take a few minutes for the deployment to finish. If you run into problems, see the section on **Known Deployment Issues** below.
The number of nodes you want to deploy. Example value: 5
**[pypi_or_branch]** (Optional)
Where the nodes should get their BigchainDB source code. If it's `pypi`, then BigchainDB will be installed from the latest `bigchaindb` package in the [Python Package Index (PyPI)](https://pypi.python.org/pypi). That is, on each node, BigchainDB will be installed using `pip install bigchaindb`. You can also put the name of a local Git branch; it will be compressed and sent out to all the nodes for installation. If you don't include the second argument, then the default is `pypi`.
**[servers_or_clients]** (Optional)
If you want to deploy BigchainDB servers, then the third argument should be `servers`.
If you want to deploy BigchainDB clients, then the third argument should be `clients`.
The third argument is optional, but if you want to include it, you must also include the second argument. If you don't include the third argument, then the default is `servers`.
---
If you're curious what the `awsdeploy.sh` script does, [the source code](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/awsdeploy.sh) has lots of explanatory comments, so it's quite easy to read.
It should take a few minutes for the deployment to finish. If you run into problems, see the section on Known Deployment Issues below.
The EC2 Console has a section where you can see all the instances you have running on EC2. You can `ssh` into a running instance using a command like: The EC2 Console has a section where you can see all the instances you have running on EC2. You can `ssh` into a running instance using a command like:
```text ```text