Merge pull request #291 from bigchaindb/feat/280/deploy_conf_file_for_aws_deployment_config

Feat/280/deploy conf file for aws deployment config
2024-10-13 13:34:05 +00:00 · 2016-05-17 12:58:25 +02:00 · 2016-05-17 12:58:25 +02:00 · bf6298cf17
commit bf6298cf17
parent e5e243b116 8b52b81e58
5 changed files with 190 additions and 85 deletions
--- a/deploy-cluster-aws/awsdeploy.sh
+++ b/deploy-cluster-aws/awsdeploy.sh
@ -4,38 +4,35 @@
 # if any command has a non-zero exit status
 set -e
-USAGE="usage: ./awsdeploy.sh <number_of_nodes_in_cluster> <pypi_or_branch> <servers_or_clients>"
+# Check for the first command-line argument
-
+# (the name of the AWS deployment config file)
 if [ -z "$1" ]; then
-    echo $USAGE
+    # no first argument was provided
-    echo "No first argument was specified"
+    echo "awsdeploy: missing file operand"
-    echo "It should be a number like 3 or 15"
+    echo "Usage: awsdeploy DEPLOY_CONF_FILE"
-    exit 1
+    echo "Deploy BigchainDB on AWS using the specified AWS deployment configuration file"
 else
    NUM_NODES=$1
 fi
 if [ -z "$2" ]; then
    echo $USAGE
    echo "No second argument was specified, so BigchainDB will be installed from PyPI"
    BRANCH="pypi"
 else
    BRANCH=$2
 fi
 if [ -z "$3" ]; then
    echo $USAGE
    echo "No third argument was specified, so servers will be deployed"
    WHAT_TO_DEPLOY="servers"
 else
    WHAT_TO_DEPLOY=$3
 fi
 if [[ ("$WHAT_TO_DEPLOY" != "servers") && ("$WHAT_TO_DEPLOY" != "clients") ]]; then
    echo "The third argument, if included, must be servers or clients"
    exit 1
 fi
 DEPLOY_CONF_FILE=$1
 # Check to make sure DEPLOY_CONF_FILE exists
 if [ ! -f "$DEPLOY_CONF_FILE" ]; then
    echo "AWS deployment configuration file not found: "$DEPLOY_CONF_FILE
    exit 1
 fi
 # Read DEPLOY_CONF_FILE
 # to set environment variables related to AWS deployment
 echo "Reading "$DEPLOY_CONF_FILE
 source $DEPLOY_CONF_FILE
 echo "NUM_NODES = "$NUM_NODES
 echo "BRANCH = "$BRANCH
 echo "WHAT_TO_DEPLOY = "$WHAT_TO_DEPLOY
 echo "USE_KEYPAIRS_FILE = "$USE_KEYPAIRS_FILE
 echo "IMAGE_ID = "$IMAGE_ID
 echo "INSTANCE_TYPE = "$INSTANCE_TYPE
 # Check for AWS private key file (.pem file)
 if [ ! -f "pem/bigchaindb.pem" ]; then
    echo "File pem/bigchaindb.pem (AWS private key) is missing"
@ -49,6 +46,21 @@ if [ ! -d "confiles" ]; then
    exit 1
 fi
 # Check if NUM_NODES got set
 if [ -z "$NUM_NODES" ]; then
    echo "NUM_NODES is not set in the AWS deployment configuration file "$DEPLOY_CONF_FILE
    exit 1
 fi
 # Check if the number of files in confiles directory == NUM_NODES
 CONFILES_COUNT=`ls confiles | wc -l`
 if [[ $CONFILES_COUNT != $NUM_NODES ]]; then
    echo "ERROR: CONFILES_COUNT = "$CONFILES_COUNT
    echo "but NUM_NODES = "$NUM_NODES
    echo "so there should be "$NUM_NODES" files in the confiles directory" 
    exit 1
 fi
 # Auto-generate the tag to apply to all nodes in the cluster
 TAG="BDB-"$WHAT_TO_DEPLOY"-"`date +%m-%d@%H:%M`
 echo "TAG = "$TAG
@ -67,7 +79,7 @@ chmod 0400 pem/bigchaindb.pem
 # 5. writes the shellscript add2known_hosts.sh
 # 6. (over)writes a file named hostlist.py
 #    containing a list of all public DNS names.
-python launch_ec2_nodes.py --tag $TAG --nodes $NUM_NODES
+python launch_ec2_nodes.py --deploy-conf-file $DEPLOY_CONF_FILE --tag $TAG
 # Make add2known_hosts.sh executable then execute it.
 # This adds remote keys to ~/.ssh/known_hosts
@ -117,7 +129,11 @@ if [ "$WHAT_TO_DEPLOY" == "servers" ]; then
    # Transform the config files in the confiles directory
    # to have proper keyrings, api_endpoint values, etc.
-    python clusterize_confiles.py confiles $NUM_NODES
+    if [ "$USE_KEYPAIRS_FILE" == "True" ]; then
        python clusterize_confiles.py -k confiles $NUM_NODES
    else
        python clusterize_confiles.py confiles $NUM_NODES
    fi
    # Send one of the config files to each instance
    for (( HOST=0 ; HOST<$NUM_NODES ; HOST++ )); do
--- a/deploy-cluster-aws/clusterize_confiles.py
+++ b/deploy-cluster-aws/clusterize_confiles.py
@ -67,7 +67,7 @@ if use_keypairs:
 # Make a list containing all the public keys
 if use_keypairs:
    print('Using keypairs from keypairs.py')
-    pubkeys = [keypair[1] for keypair in keypairs_list]
+    pubkeys = [keypair[1] for keypair in keypairs_list[:num_files]]
 else:
    # read the pubkeys from the config files in conf_dir
    pubkeys = []
--- a/deploy-cluster-aws/example_deploy_conf.py
+++ b/deploy-cluster-aws/example_deploy_conf.py
@ -0,0 +1,51 @@
 # AWS deployment config file
 # To use in a Bash shell script:
 # source example_deploy_conf.py
 # # $EXAMPLEVAR now has a value
 # To use in a Python script:
 # from example_deploy_conf import *
 # or
 # import importlib
 # cf = importlib.import_module('example_deploy_conf')
 # # cf.EXAMPLEVAR now has a value
 # DON'T PUT SPACES AROUND THE =
 # because that would confuse Bash.
 # Example values: "string in double quotes", 32, True, False
 # NUM_NODES is the number of nodes to deploy
 NUM_NODES=3
 # PYPI_OR_BRANCH is either "pypi" or the name of a local Git branch
 # (e.g. "master" or "feat/3627/optional-delimiter-in-txfile")
 # It's where to get the BigchainDB code to be deployed on the nodes
 BRANCH="master"
 # WHAT_TO_DEPLOY is either "servers" or "clients"
 # What do you want to deploy?
 WHAT_TO_DEPLOY="servers"
 # USE_KEYPAIRS_FILE is either True or False
 # Should node keypairs be read from keypairs.py?
 # (If False, then the keypairs will be whatever is in the the
 #  BigchainDB config files in the confiles directory.)
 USE_KEYPAIRS_FILE=False
 # IMAGE_ID is the Amazon Machine Image (AMI) id to use
 # in all the servers/instances to be launched.
 # Examples:
 # "ami-accff2b1" = An Ubuntu 14.04.2 LTX "Ubuntu Cloud image" from Canonical
 #                  64-bit, hvm-ssd, published to eu-central-1
 #                  See http://tinyurl.com/hkjhg46
 # "ami-596b7235" = Ubuntu with IOPS storage? Does this work?
 #
 # See http://cloud-images.ubuntu.com/releases/14.04/release-20150325/
 IMAGE_ID="ami-accff2b1"
 # INSTANCE_TYPE is the type of AWS instance to launch
 # i.e. How many CPUs do you want? How much storage? etc.
 # Examples: "m3.2xlarge", "c3.8xlarge", "c4.8xlarge"
 # For all options, see https://aws.amazon.com/ec2/instance-types/
 INSTANCE_TYPE="m3.2xlarge"
--- a/deploy-cluster-aws/launch_ec2_nodes.py
+++ b/deploy-cluster-aws/launch_ec2_nodes.py
@ -16,12 +16,22 @@ import sys
 import time
 import socket
 import argparse
 import importlib
 import botocore
 import boto3
 from awscommon import get_naeips
-# First, ensure they're using Python 2.5-2.7
+SETTINGS = ['NUM_NODES', 'BRANCH', 'WHAT_TO_DEPLOY', 'USE_KEYPAIRS_FILE',
            'IMAGE_ID', 'INSTANCE_TYPE']
 class SettingsTypeError(TypeError):
    pass
 # Ensure they're using Python 2.5-2.7
 pyver = sys.version_info
 major = pyver[0]
 minor = pyver[1]
@ -36,14 +46,54 @@ parser = argparse.ArgumentParser()
 parser.add_argument("--tag",
                    help="tag to add to all launched instances on AWS",
                    required=True)
-parser.add_argument("--nodes",
+parser.add_argument("--deploy-conf-file",
-                    help="number of nodes in the cluster",
+                    help="AWS deployment configuration file",
-                    required=True,
+                    required=True)
                    type=int)
 args = parser.parse_args()
 tag = args.tag
-num_nodes = int(args.nodes)
+deploy_conf_file = args.deploy_conf_file
 # Import all the variables set in the AWS deployment configuration file
 # (Remove the '.py' from the end of deploy_conf_file.)
 cf = importlib.import_module(deploy_conf_file[:-3])
 dir_cf = dir(cf)  # = a list of the attributes of cf
 for setting in SETTINGS:
    if setting not in dir_cf:
        sys.exit('{} was not set '.format(setting) +
                 'in the specified AWS deployment '
                 'configuration file {}'.format(deploy_conf_file))
    exec('{0} = cf.{0}'.format(setting))
 # Validate the variables set in the AWS deployment configuration file
 if not isinstance(NUM_NODES, int):
    raise SettingsTypeError('NUM_NODES should be an int')
 if not isinstance(BRANCH, str):
    raise SettingsTypeError('BRANCH should be a string')
 if not isinstance(WHAT_TO_DEPLOY, str):
    raise SettingsTypeError('WHAT_TO_DEPLOY should be a string')
 if not isinstance(USE_KEYPAIRS_FILE, bool):
    msg = 'USE_KEYPAIRS_FILE should a boolean (True or False)'
    raise SettingsTypeError(msg)
 if not isinstance(IMAGE_ID, str):
    raise SettingsTypeError('IMAGE_ID should be a string')
 if not isinstance(INSTANCE_TYPE, str):
    raise SettingsTypeError('INSTANCE_TYPE should be a string')
 if NUM_NODES > 64:
    raise ValueError('NUM_NODES should be less than or equal to 64. '
                     'The AWS deployment configuration file sets it to {}'.
                     format(NUM_NODES))
 if WHAT_TO_DEPLOY not in ['servers', 'clients']:
    raise ValueError('WHAT_TO_DEPLOY should be either "servers" or "clients". '
                     'The AWS deployment configuration file sets it to {}'.
                     format(WHAT_TO_DEPLOY))
 # Get an AWS EC2 "resource"
 # See http://boto3.readthedocs.org/en/latest/guide/resources.html
@ -81,10 +131,10 @@ print('You have {} allocated elastic IPs which are '
      'not already associated with instances'.
      format(len(non_associated_eips)))
-if num_nodes > len(non_associated_eips):
+if NUM_NODES > len(non_associated_eips):
-    num_eips_to_allocate = num_nodes - len(non_associated_eips)
+    num_eips_to_allocate = NUM_NODES - len(non_associated_eips)
    print('You want to launch {} instances'.
-          format(num_nodes))
+          format(NUM_NODES))
    print('so {} more elastic IPs must be allocated'.
          format(num_eips_to_allocate))
    for _ in range(num_eips_to_allocate):
@ -103,22 +153,19 @@ if num_nodes > len(non_associated_eips):
            raise
 print('Commencing launch of {} instances on Amazon EC2...'.
-      format(num_nodes))
+      format(NUM_NODES))
-for _ in range(num_nodes):
+for _ in range(NUM_NODES):
    # Request the launch of one instance at a time
    # (so list_of_instances should contain only one item)
    list_of_instances = ec2.create_instances(
-            ImageId='ami-accff2b1',          # ubuntu-image
+        ImageId=IMAGE_ID,
-            # 'ami-596b7235',                 # ubuntu w/ iops storage
+        MinCount=1,
-            MinCount=1,
+        MaxCount=1,
-            MaxCount=1,
+        KeyName='bigchaindb',
-            KeyName='bigchaindb',
+        InstanceType=INSTANCE_TYPE,
-            InstanceType='m3.2xlarge',
+        SecurityGroupIds=['bigchaindb']
-            # 'c3.8xlarge',
+    )
            # 'c4.8xlarge',
            SecurityGroupIds=['bigchaindb']
            )
    # Tag the just-launched instances (should be just one)
    for instance in list_of_instances:
--- a/docs/source/deploy-on-aws.md
+++ b/docs/source/deploy-on-aws.md
@ -131,7 +131,7 @@ To configure a BigchainDB node to send monitoring data to the monitoring server,
 ### Step 1
-Suppose _N_ is the number of nodes you want in your BigchainDB cluster. If you already have a set of _N_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory, then you can jump to step 2. To create such a set, you can do something like:
+Suppose _N_ is the number of nodes you want in your BigchainDB cluster. If you already have a set of _N_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory, then you can jump to the next step. To create such a set, you can do something like:
 ```text
 # in a Python 3 virtual environment where bigchaindb is installed
 cd bigchaindb
@ -141,11 +141,23 @@ cd deploy-cluster-aws
 That will create three (3) _default_ BigchainDB configuration files in the `deploy-cluster-aws/confiles` directory (which will be created if it doesn't already exist). The three files will be named `bcdb_conf0`, `bcdb_conf1`, and `bcdb_conf2`.
-You can look inside those files if you're curious. In step 2, they'll be modified. For example, the default keyring is an empty list. In step 2, the deployment script automatically changes the keyring of each node to be a list of the public keys of all other nodes. Other changes are also made.
+You can look inside those files if you're curious. For example, the default keyring is an empty list. Later, the deployment script automatically changes the keyring of each node to be a list of the public keys of all other nodes. Other changes are also made. That is, the configuration files generated in this step are _not_ what will be sent to the deployed nodes; they're just a starting point.
-**An Aside on Using a Standard Set of Keypairs**
+### Step 2
-It's possible to deploy BigchainDB servers with a known set of keypairs. You can generate a set of keypairs in a file named `keypairs.py` using the `write_keypairs_file.py` script. For example:
+Step 2 is to make an AWS deployment configuration file, if necessary. There's an example AWS configuration file named `example_deploy_conf.py`. It has many comments explaining each setting. The settings in that file are (or should be):
 ```text
 NUM_NODES=3
 BRANCH="master"
 WHAT_TO_DEPLOY="servers"
 USE_KEYPAIRS_FILE=False
 IMAGE_ID="ami-accff2b1"
 INSTANCE_TYPE="m3.2xlarge"
 ```
 If you're happy with those settings, then you can skip to the next step. Otherwise, you could make a copy of `example_deploy_conf.py` (e.g. `cp example_deploy_conf.py my_deploy_conf.py`) and then edit the copy using a text editor.
 If you want your nodes to have a predictable set of pre-generated keypairs, then you should 1) set `USE_KEYPAIRS_FILE=True` in the AWS deployment configuration file, and 2) provide a `keypairs.py` file containing enough keypairs for all of your nodes. You can generate a `keypairs.py` file using the `write_keypairs_file.py` script. For example:
 ```text
 # in a Python 3 virtual environment where bigchaindb is installed
 cd bigchaindb
@ -153,45 +165,24 @@ cd deploy-cluster-aws
 python3 write_keypairs_file.py 100
 ```
-The above command generates a file with 100 keypairs. (You can generate more keypairs than you need, so you can use the same list over and over again, for different numbers of servers.) To make the `awsdeploy.sh` script read all keys from `keypairs.py`, you must _edit_ the `awsdeploy.sh` script: change the line that says `python clusterize_confiles.py confiles $NUM_NODES` to `python clusterize_confiles.py -k confiles $NUM_NODES` (i.e. add the `-k` option).
+The above command generates a `keypairs.py` file with 100 keypairs. You can generate more keypairs than you need, so you can use the same list over and over again, for different numbers of servers. The deployment scripts will only use the first NUM_NODES keypairs.
-### Step 2
+### Step 3
-Step 2 is to launch the nodes ("instances") on AWS, to install all the necessary software on them, configure the software, run the software, and more.
+Step 3 is to launch the nodes ("instances") on AWS, to install all the necessary software on them, configure the software, run the software, and more. Here's how you'd do that:
 Here's an example of how one could launch a BigchainDB cluster of three (3) nodes on AWS:
 ```text
 # in a Python 2.5-2.7 virtual environment where fabric, boto3, etc. are installed
 cd bigchaindb
 cd deploy-cluster-aws
-./awsdeploy.sh 3
+./awsdeploy.sh my_deploy_conf.py
 # Only if you want to start BigchainDB on all the nodes:
 fab start_bigchaindb
 ```
-`awsdeploy.sh` is a Bash script which calls some Python and Fabric scripts. The usage is:
+`awsdeploy.sh` is a Bash script which calls some Python and Fabric scripts. If you're curious what it does, [the source code](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/awsdeploy.sh) has many explanatory comments.
 ```text
 ./awsdeploy.sh <number_of_nodes_in_cluster> [pypi_or_branch] [servers_or_clients]
 ```
-**<number_of_nodes_in_cluster>** (Required)
+It should take a few minutes for the deployment to finish. If you run into problems, see the section on **Known Deployment Issues** below.
 The number of nodes you want to deploy. Example value: 5
 **[pypi_or_branch]** (Optional)
 Where the nodes should get their BigchainDB source code. If it's `pypi`, then BigchainDB will be installed from the latest `bigchaindb` package in the [Python Package Index (PyPI)](https://pypi.python.org/pypi). That is, on each node, BigchainDB will be installed using `pip install bigchaindb`. You can also put the name of a local Git branch; it will be compressed and sent out to all the nodes for installation. If you don't include the second argument, then the default is `pypi`.
 **[servers_or_clients]** (Optional)
 If you want to deploy BigchainDB servers, then the third argument should be `servers`.
 If you want to deploy BigchainDB clients, then the third argument should be `clients`.
 The third argument is optional, but if you want to include it, you must also include the second argument. If you don't include the third argument, then the default is `servers`.
 ---
 If you're curious what the `awsdeploy.sh` script does, [the source code](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/awsdeploy.sh) has lots of explanatory comments, so it's quite easy to read.
 It should take a few minutes for the deployment to finish. If you run into problems, see the section on Known Deployment Issues below.
 The EC2 Console has a section where you can see all the instances you have running on EC2. You can `ssh` into a running instance using a command like:
 ```text