Merge pull request #469 from bigchaindb/feat/461/deploy-cluster-on-aws-using-ebs

Added ability to deploy test cluster on AWS using EBS for storage
This commit is contained in:
Troy McConaghy 2016-07-28 14:48:34 +02:00 committed by GitHub
commit b3f464de86
6 changed files with 177 additions and 60 deletions

View File

@ -32,6 +32,11 @@ echo "WHAT_TO_DEPLOY = "$WHAT_TO_DEPLOY
echo "USE_KEYPAIRS_FILE = "$USE_KEYPAIRS_FILE
echo "IMAGE_ID = "$IMAGE_ID
echo "INSTANCE_TYPE = "$INSTANCE_TYPE
echo "USING_EBS = "$USING_EBS
if [ "$USING_EBS" = True ]; then
echo "EBS_VOLUME_SIZE = "$EBS_VOLUME_SIZE
echo "EBS_OPTIMIZED = "$EBS_OPTIMIZED
fi
# Check for AWS private key file (.pem file)
if [ ! -f "pem/bigchaindb.pem" ]; then
@ -95,8 +100,12 @@ fab upgrade_setuptools
if [ "$WHAT_TO_DEPLOY" == "servers" ]; then
# (Re)create the RethinkDB configuration file conf/rethinkdb.conf
python create_rethinkdb_conf.py
# Rollout storage backend (RethinkDB) and start it
# Rollout RethinkDB and start it
fab prep_rethinkdb_storage:$USING_EBS
fab install_rethinkdb
fab configure_rethinkdb
fab delete_rethinkdb_data
fab start_rethinkdb
fi
# Rollout BigchainDB (but don't start it yet)
@ -148,6 +157,8 @@ if [ "$WHAT_TO_DEPLOY" == "servers" ]; then
# definition of init_bigchaindb() in fabfile.py to see why.
fab init_bigchaindb
fab set_shards:$NUM_NODES
echo "To set the replication factor to 3, do: fab set_replicas:3"
echo "To start BigchainDB on all the nodes, do: fab start_bigchaindb"
else
# Deploying clients
# The only thing to configure on clients is the api_endpoint

View File

@ -18,7 +18,7 @@
# NUM_NODES is the number of nodes to deploy
NUM_NODES=3
# PYPI_OR_BRANCH is either "pypi" or the name of a local Git branch
# BRANCH is either "pypi" or the name of a local Git branch
# (e.g. "master" or "feat/3627/optional-delimiter-in-txfile")
# It's where to get the BigchainDB code to be deployed on the nodes
BRANCH="master"
@ -49,3 +49,19 @@ IMAGE_ID="ami-accff2b1"
# Examples: "m3.2xlarge", "c3.8xlarge", "c4.8xlarge"
# For all options, see https://aws.amazon.com/ec2/instance-types/
INSTANCE_TYPE="m3.2xlarge"
# USING_EBS is True if you want to attach an Amazon EBS volume
USING_EBS=False
# EBS_VOLUME_SIZE is the size of the EBS volume to attach, in GiB
# Since we assume 'gp2' volumes (for now), the possible range is 1 to 16384
# If USING_EBS=False, EBS_VOLUME_SIZE is irrelevant and not used
EBS_VOLUME_SIZE=30
# EBS_OPTIMIZED is True or False, depending on whether you want
# EBS-optimized instances. See:
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSOptimized.html
# Not all instance types support EBS optimization.
# Setting EBS_OPTIMIZED=True may cost more, but not always.
# If USING_EBS=False, EBS_OPTIMIZED is irrelevant and not used
EBS_OPTIMIZED=False

View File

@ -97,45 +97,94 @@ def upgrade_setuptools():
sudo('pip3 install --upgrade setuptools')
# Install RethinkDB
# Prepare RethinkDB storage
@task
@parallel
def install_rethinkdb():
"""Installation of RethinkDB"""
with settings(warn_only=True):
# preparing filesystem
sudo("mkdir -p /data")
# Locally mounted storage (m3.2xlarge, but also c3.xxx)
def prep_rethinkdb_storage(USING_EBS):
"""Prepare RethinkDB storage"""
# Convert USING_EBS from a string to a bool
USING_EBS = (USING_EBS.lower() == 'true')
# Make the /data directory for RethinkDB data
sudo("mkdir -p /data")
# OLD: with settings(warn_only=True):
if USING_EBS: # on /dev/xvdp
# See https://tinyurl.com/h2nut68
sudo("mkfs -t ext4 /dev/xvdp")
sudo("mount /dev/xvdp /data")
# To mount this EBS volume on every system reboot,
# add an entry for the device to the /etc/fstab file.
# First, make a copy of the current /etc/fstab file
sudo("cp /etc/fstab /etc/fstab.orig")
# Append a line to /etc/fstab
sudo("echo '/dev/xvdp /data ext4 defaults,nofail,nobootwait 0 2' >> /etc/fstab")
# Veryify the /etc/fstab file. If something is wrong with it,
# then this should produce an error:
sudo("mount -a")
# Set the I/O scheduler for /dev/xdvp to deadline
with settings(sudo_user='root'):
sudo("echo deadline > /sys/block/xvdp/queue/scheduler")
else: # not using EBS.
# Using the "instance store" that comes with the instance.
# If the instance store comes with more than one volume,
# this only mounts ONE of them: /dev/xvdb
# For example, m3.2xlarge instances have /dev/xvdb and /dev/xvdc
# and /mnt is mounted on /dev/xvdb by default.
try:
sudo("umount /mnt")
sudo("mkfs -t ext4 /dev/xvdb")
sudo("mount /dev/xvdb /data")
except:
pass
# persist settings to fstab
sudo("rm -rf /etc/fstab")
sudo("echo 'LABEL=cloudimg-rootfs / ext4 defaults,discard 0 0' >> /etc/fstab")
sudo("echo '/dev/xvdb /data ext4 defaults,noatime 0 0' >> /etc/fstab")
# activate deadline scheduler
# Set the I/O scheduler for /dev/xdvb to deadline
with settings(sudo_user='root'):
sudo("echo deadline > /sys/block/xvdb/queue/scheduler")
# install rethinkdb
sudo("echo 'deb http://download.rethinkdb.com/apt trusty main' | sudo tee /etc/apt/sources.list.d/rethinkdb.list")
sudo("wget -qO- http://download.rethinkdb.com/apt/pubkey.gpg | sudo apt-key add -")
sudo("apt-get update")
sudo("apt-get -y install rethinkdb")
# change fs to user
sudo('chown -R rethinkdb:rethinkdb /data')
# copy config file to target system
put('conf/rethinkdb.conf',
'/etc/rethinkdb/instances.d/instance1.conf',
mode=0600,
use_sudo=True)
# initialize data-dir
sudo('rm -rf /data/*')
# finally restart instance
sudo('/etc/init.d/rethinkdb restart')
# Install RethinkDB
@task
@parallel
def install_rethinkdb():
"""Install RethinkDB"""
sudo("echo 'deb http://download.rethinkdb.com/apt trusty main' | sudo tee /etc/apt/sources.list.d/rethinkdb.list")
sudo("wget -qO- http://download.rethinkdb.com/apt/pubkey.gpg | sudo apt-key add -")
sudo("apt-get update")
sudo("apt-get -y install rethinkdb")
# Change owner:group of the RethinkDB data directory to rethinkdb:rethinkdb
sudo('chown -R rethinkdb:rethinkdb /data')
# Configure RethinkDB
@task
@parallel
def configure_rethinkdb():
"""Copy the RethinkDB config file to the remote host"""
put('conf/rethinkdb.conf',
'/etc/rethinkdb/instances.d/instance1.conf',
mode=0600,
use_sudo=True)
# Delete RethinkDB data
@task
@parallel
def delete_rethinkdb_data():
"""Delete the contents of the RethinkDB /data directory
but not the directory itself.
"""
sudo('rm -rf /data/*')
# Start RethinkDB
@task
@parallel
def start_rethinkdb():
"""Start RethinkDB"""
sudo('/etc/init.d/rethinkdb restart')
# Install BigchainDB from PyPI
@ -197,13 +246,20 @@ def init_bigchaindb():
run('bigchaindb init', pty=False)
# Set the number of shards (in the backlog and bigchain tables)
# Set the number of shards (in all tables)
@task
@hosts(public_dns_names[0])
def set_shards(num_shards):
run('bigchaindb set-shards {}'.format(num_shards))
# Set the number of replicas (in all tables)
@task
@hosts(public_dns_names[0])
def set_replicas(num_replicas):
run('bigchaindb set-replicas {}'.format(num_replicas))
# Start BigchainDB using screen
@task
@parallel

View File

@ -24,7 +24,8 @@ from awscommon import get_naeips
SETTINGS = ['NUM_NODES', 'BRANCH', 'WHAT_TO_DEPLOY', 'USE_KEYPAIRS_FILE',
'IMAGE_ID', 'INSTANCE_TYPE']
'IMAGE_ID', 'INSTANCE_TYPE', 'USING_EBS', 'EBS_VOLUME_SIZE',
'EBS_OPTIMIZED']
class SettingsTypeError(TypeError):
@ -76,7 +77,7 @@ if not isinstance(WHAT_TO_DEPLOY, str):
raise SettingsTypeError('WHAT_TO_DEPLOY should be a string')
if not isinstance(USE_KEYPAIRS_FILE, bool):
msg = 'USE_KEYPAIRS_FILE should a boolean (True or False)'
msg = 'USE_KEYPAIRS_FILE should be a boolean (True or False)'
raise SettingsTypeError(msg)
if not isinstance(IMAGE_ID, str):
@ -85,6 +86,15 @@ if not isinstance(IMAGE_ID, str):
if not isinstance(INSTANCE_TYPE, str):
raise SettingsTypeError('INSTANCE_TYPE should be a string')
if not isinstance(USING_EBS, bool):
raise SettingsTypeError('USING_EBS should be a boolean (True or False)')
if not isinstance(EBS_VOLUME_SIZE, int):
raise SettingsTypeError('EBS_VOLUME_SIZE should be an int')
if not isinstance(EBS_OPTIMIZED, bool):
raise SettingsTypeError('EBS_OPTIMIZED should be a boolean (True or False)')
if NUM_NODES > 64:
raise ValueError('NUM_NODES should be less than or equal to 64. '
'The AWS deployment configuration file sets it to {}'.
@ -95,6 +105,12 @@ if WHAT_TO_DEPLOY not in ['servers', 'clients']:
'The AWS deployment configuration file sets it to {}'.
format(WHAT_TO_DEPLOY))
# Since we assume 'gp2' volumes (for now), the possible range is 1 to 16384
if EBS_VOLUME_SIZE > 16384:
raise ValueError('EBS_VOLUME_SIZE should be <= 16384. '
'The AWS deployment configuration file sets it to {}'.
format(EBS_VOLUME_SIZE))
# Get an AWS EC2 "resource"
# See http://boto3.readthedocs.org/en/latest/guide/resources.html
ec2 = boto3.resource(service_name='ec2')
@ -158,14 +174,40 @@ print('Commencing launch of {} instances on Amazon EC2...'.
for _ in range(NUM_NODES):
# Request the launch of one instance at a time
# (so list_of_instances should contain only one item)
list_of_instances = ec2.create_instances(
ImageId=IMAGE_ID,
MinCount=1,
MaxCount=1,
KeyName='bigchaindb',
InstanceType=INSTANCE_TYPE,
SecurityGroupIds=['bigchaindb']
)
# See https://tinyurl.com/hbjewbb
if USING_EBS:
dm = {
'DeviceName': '/dev/sdp',
# Why /dev/sdp? See https://tinyurl.com/z2zqm6n
'Ebs': {
'VolumeSize': EBS_VOLUME_SIZE, # GiB
'DeleteOnTermination': False,
'VolumeType': 'gp2',
'Encrypted': False
},
# 'NoDevice': 'device'
# Suppresses the specified device included
# in the block device mapping of the AMI.
}
list_of_instances = ec2.create_instances(
ImageId=IMAGE_ID,
MinCount=1,
MaxCount=1,
KeyName='bigchaindb',
InstanceType=INSTANCE_TYPE,
SecurityGroupIds=['bigchaindb'],
BlockDeviceMappings=[dm],
EbsOptimized=EBS_OPTIMIZED
)
else: # not USING_EBS
list_of_instances = ec2.create_instances(
ImageId=IMAGE_ID,
MinCount=1,
MaxCount=1,
KeyName='bigchaindb',
InstanceType=INSTANCE_TYPE,
SecurityGroupIds=['bigchaindb']
)
# Tag the just-launched instances (should be just one)
for instance in list_of_instances:

View File

@ -1,32 +1,19 @@
# Example RethinkDB Storage Setups
## Example 1: A Partition of an AWS Instance Store
## Example Amazon EC2 Setups
Many [AWS EC2 instance types](https://aws.amazon.com/ec2/instance-types/) comes with an [instance store](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html): temporary storage that disappears when the instance disappears. The size and setup of an instance store depends on the EC2 instance type.
We have some scripts for [deploying a _test_ BigchainDB cluster on AWS](../clusters-feds/deploy-on-aws.html). Those scripts include command sequences to set up storage for RethinkDB.
In particular, look in the file [/deploy-cluster-aws/fabfile.py](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/fabfile.py), under `def prep_rethinkdb_storage(USING_EBS)`. Note that there are two cases:
We have some scripts for [deploying a _test_ BigchainDB cluster on AWS](../clusters-feds/deploy-on-aws.html). Those scripts include commands to set up a partition (`/dev/xvdb`) on an instance store for RethinkDB data. Those commands can be found in the file `/deploy-cluster-aws/fabfile.py`, under `def install_rethinkdb()` (i.e. the Fabric function to install RethinkDB).
1. **Using EBS ([Amazon Elastic Block Store](https://aws.amazon.com/ebs/)).** This is always an option, and for some instance types ("EBS-only"), it's the only option.
2. **Using an "instance store" volume provided with an Amazon EC2 instance.** Note that our scripts only use one of the (possibly many) volumes in the instance store.
An AWS instance store is convenient, but it's intended for "buffers, caches, scratch data, and other temporary content." Moreover:
There's some explanation of the steps in the [Amazon EC2 documentation about making an Amazon EBS volume available for use](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-using-volumes.html).
* You pay for all the storage, regardless of how much you use.
* You can't increase the size of the instance store.
* If the instance stops, terminates, or reboots, you lose the associated instance store.
* Instance store data isn't replicated, so if the underlying disk drive fails, you lose the data in the instance store.
* "You can't detach an instance store volume from one instance and attach it to a different instance."
The [AWS documentation says](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html), "...do not rely on instance store for valuable, long-term data. Instead, you can build a degree of redundancy (for example, RAID 1/5/6), or use a file system (for example, HDFS and MapR-FS) that supports redundancy and fault tolerance."
**Even if you don't use an AWS instance store partition to store your node's RethinkDB data, you may find it useful to read the steps in `def install_rethinkdb()`: [see fabfile.py](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/fabfile.py).**
You shouldn't use an EC2 "instance store" to store RethinkDB data for a production node, because it's not replicated and it's only intended for temporary, ephemeral data. If the associated instance crashes, is stopped, or is terminated, the data in the instance store is lost forever. Amazon EBS storage is replicated, has incremental snapshots, and is low-latency.
## Example 2: An Amazon EBS Volume
TODO
Note: Amazon EBS volumes are always replicated.
## Example 3: Using Amazon EFS
## Example Using Amazon EFS
TODO

View File

@ -103,6 +103,9 @@ WHAT_TO_DEPLOY="servers"
USE_KEYPAIRS_FILE=False
IMAGE_ID="ami-accff2b1"
INSTANCE_TYPE="m3.2xlarge"
USING_EBS=False
EBS_VOLUME_SIZE=30
EBS_OPTIMIZED=False
```
If you're happy with those settings, then you can skip to the next step. Otherwise, you could make a copy of `example_deploy_conf.py` (e.g. `cp example_deploy_conf.py my_deploy_conf.py`) and then edit the copy using a text editor.
@ -126,6 +129,8 @@ Step 3 is to launch the nodes ("instances") on AWS, to install all the necessary
cd bigchaindb
cd deploy-cluster-aws
./awsdeploy.sh my_deploy_conf.py
# Only if you want to set the replication factor to 3
fab set_replicas:3
# Only if you want to start BigchainDB on all the nodes:
fab start_bigchaindb
```