mirror of
https://github.com/bigchaindb/bigchaindb.git
synced 2024-10-13 13:34:05 +00:00
Merge pull request #469 from bigchaindb/feat/461/deploy-cluster-on-aws-using-ebs
Added ability to deploy test cluster on AWS using EBS for storage
This commit is contained in:
commit
b3f464de86
@ -32,6 +32,11 @@ echo "WHAT_TO_DEPLOY = "$WHAT_TO_DEPLOY
|
||||
echo "USE_KEYPAIRS_FILE = "$USE_KEYPAIRS_FILE
|
||||
echo "IMAGE_ID = "$IMAGE_ID
|
||||
echo "INSTANCE_TYPE = "$INSTANCE_TYPE
|
||||
echo "USING_EBS = "$USING_EBS
|
||||
if [ "$USING_EBS" = True ]; then
|
||||
echo "EBS_VOLUME_SIZE = "$EBS_VOLUME_SIZE
|
||||
echo "EBS_OPTIMIZED = "$EBS_OPTIMIZED
|
||||
fi
|
||||
|
||||
# Check for AWS private key file (.pem file)
|
||||
if [ ! -f "pem/bigchaindb.pem" ]; then
|
||||
@ -95,8 +100,12 @@ fab upgrade_setuptools
|
||||
if [ "$WHAT_TO_DEPLOY" == "servers" ]; then
|
||||
# (Re)create the RethinkDB configuration file conf/rethinkdb.conf
|
||||
python create_rethinkdb_conf.py
|
||||
# Rollout storage backend (RethinkDB) and start it
|
||||
# Rollout RethinkDB and start it
|
||||
fab prep_rethinkdb_storage:$USING_EBS
|
||||
fab install_rethinkdb
|
||||
fab configure_rethinkdb
|
||||
fab delete_rethinkdb_data
|
||||
fab start_rethinkdb
|
||||
fi
|
||||
|
||||
# Rollout BigchainDB (but don't start it yet)
|
||||
@ -148,6 +157,8 @@ if [ "$WHAT_TO_DEPLOY" == "servers" ]; then
|
||||
# definition of init_bigchaindb() in fabfile.py to see why.
|
||||
fab init_bigchaindb
|
||||
fab set_shards:$NUM_NODES
|
||||
echo "To set the replication factor to 3, do: fab set_replicas:3"
|
||||
echo "To start BigchainDB on all the nodes, do: fab start_bigchaindb"
|
||||
else
|
||||
# Deploying clients
|
||||
# The only thing to configure on clients is the api_endpoint
|
||||
|
@ -18,7 +18,7 @@
|
||||
# NUM_NODES is the number of nodes to deploy
|
||||
NUM_NODES=3
|
||||
|
||||
# PYPI_OR_BRANCH is either "pypi" or the name of a local Git branch
|
||||
# BRANCH is either "pypi" or the name of a local Git branch
|
||||
# (e.g. "master" or "feat/3627/optional-delimiter-in-txfile")
|
||||
# It's where to get the BigchainDB code to be deployed on the nodes
|
||||
BRANCH="master"
|
||||
@ -49,3 +49,19 @@ IMAGE_ID="ami-accff2b1"
|
||||
# Examples: "m3.2xlarge", "c3.8xlarge", "c4.8xlarge"
|
||||
# For all options, see https://aws.amazon.com/ec2/instance-types/
|
||||
INSTANCE_TYPE="m3.2xlarge"
|
||||
|
||||
# USING_EBS is True if you want to attach an Amazon EBS volume
|
||||
USING_EBS=False
|
||||
|
||||
# EBS_VOLUME_SIZE is the size of the EBS volume to attach, in GiB
|
||||
# Since we assume 'gp2' volumes (for now), the possible range is 1 to 16384
|
||||
# If USING_EBS=False, EBS_VOLUME_SIZE is irrelevant and not used
|
||||
EBS_VOLUME_SIZE=30
|
||||
|
||||
# EBS_OPTIMIZED is True or False, depending on whether you want
|
||||
# EBS-optimized instances. See:
|
||||
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSOptimized.html
|
||||
# Not all instance types support EBS optimization.
|
||||
# Setting EBS_OPTIMIZED=True may cost more, but not always.
|
||||
# If USING_EBS=False, EBS_OPTIMIZED is irrelevant and not used
|
||||
EBS_OPTIMIZED=False
|
||||
|
110
deploy-cluster-aws/fabfile.py
vendored
110
deploy-cluster-aws/fabfile.py
vendored
@ -97,45 +97,94 @@ def upgrade_setuptools():
|
||||
sudo('pip3 install --upgrade setuptools')
|
||||
|
||||
|
||||
# Install RethinkDB
|
||||
# Prepare RethinkDB storage
|
||||
@task
|
||||
@parallel
|
||||
def install_rethinkdb():
|
||||
"""Installation of RethinkDB"""
|
||||
with settings(warn_only=True):
|
||||
# preparing filesystem
|
||||
sudo("mkdir -p /data")
|
||||
# Locally mounted storage (m3.2xlarge, but also c3.xxx)
|
||||
def prep_rethinkdb_storage(USING_EBS):
|
||||
"""Prepare RethinkDB storage"""
|
||||
# Convert USING_EBS from a string to a bool
|
||||
USING_EBS = (USING_EBS.lower() == 'true')
|
||||
|
||||
# Make the /data directory for RethinkDB data
|
||||
sudo("mkdir -p /data")
|
||||
|
||||
# OLD: with settings(warn_only=True):
|
||||
if USING_EBS: # on /dev/xvdp
|
||||
# See https://tinyurl.com/h2nut68
|
||||
sudo("mkfs -t ext4 /dev/xvdp")
|
||||
sudo("mount /dev/xvdp /data")
|
||||
# To mount this EBS volume on every system reboot,
|
||||
# add an entry for the device to the /etc/fstab file.
|
||||
# First, make a copy of the current /etc/fstab file
|
||||
sudo("cp /etc/fstab /etc/fstab.orig")
|
||||
# Append a line to /etc/fstab
|
||||
sudo("echo '/dev/xvdp /data ext4 defaults,nofail,nobootwait 0 2' >> /etc/fstab")
|
||||
# Veryify the /etc/fstab file. If something is wrong with it,
|
||||
# then this should produce an error:
|
||||
sudo("mount -a")
|
||||
# Set the I/O scheduler for /dev/xdvp to deadline
|
||||
with settings(sudo_user='root'):
|
||||
sudo("echo deadline > /sys/block/xvdp/queue/scheduler")
|
||||
else: # not using EBS.
|
||||
# Using the "instance store" that comes with the instance.
|
||||
# If the instance store comes with more than one volume,
|
||||
# this only mounts ONE of them: /dev/xvdb
|
||||
# For example, m3.2xlarge instances have /dev/xvdb and /dev/xvdc
|
||||
# and /mnt is mounted on /dev/xvdb by default.
|
||||
try:
|
||||
sudo("umount /mnt")
|
||||
sudo("mkfs -t ext4 /dev/xvdb")
|
||||
sudo("mount /dev/xvdb /data")
|
||||
except:
|
||||
pass
|
||||
|
||||
# persist settings to fstab
|
||||
sudo("rm -rf /etc/fstab")
|
||||
sudo("echo 'LABEL=cloudimg-rootfs / ext4 defaults,discard 0 0' >> /etc/fstab")
|
||||
sudo("echo '/dev/xvdb /data ext4 defaults,noatime 0 0' >> /etc/fstab")
|
||||
# activate deadline scheduler
|
||||
# Set the I/O scheduler for /dev/xdvb to deadline
|
||||
with settings(sudo_user='root'):
|
||||
sudo("echo deadline > /sys/block/xvdb/queue/scheduler")
|
||||
# install rethinkdb
|
||||
sudo("echo 'deb http://download.rethinkdb.com/apt trusty main' | sudo tee /etc/apt/sources.list.d/rethinkdb.list")
|
||||
sudo("wget -qO- http://download.rethinkdb.com/apt/pubkey.gpg | sudo apt-key add -")
|
||||
sudo("apt-get update")
|
||||
sudo("apt-get -y install rethinkdb")
|
||||
# change fs to user
|
||||
sudo('chown -R rethinkdb:rethinkdb /data')
|
||||
# copy config file to target system
|
||||
put('conf/rethinkdb.conf',
|
||||
'/etc/rethinkdb/instances.d/instance1.conf',
|
||||
mode=0600,
|
||||
use_sudo=True)
|
||||
# initialize data-dir
|
||||
sudo('rm -rf /data/*')
|
||||
# finally restart instance
|
||||
sudo('/etc/init.d/rethinkdb restart')
|
||||
|
||||
|
||||
# Install RethinkDB
|
||||
@task
|
||||
@parallel
|
||||
def install_rethinkdb():
|
||||
"""Install RethinkDB"""
|
||||
sudo("echo 'deb http://download.rethinkdb.com/apt trusty main' | sudo tee /etc/apt/sources.list.d/rethinkdb.list")
|
||||
sudo("wget -qO- http://download.rethinkdb.com/apt/pubkey.gpg | sudo apt-key add -")
|
||||
sudo("apt-get update")
|
||||
sudo("apt-get -y install rethinkdb")
|
||||
# Change owner:group of the RethinkDB data directory to rethinkdb:rethinkdb
|
||||
sudo('chown -R rethinkdb:rethinkdb /data')
|
||||
|
||||
|
||||
# Configure RethinkDB
|
||||
@task
|
||||
@parallel
|
||||
def configure_rethinkdb():
|
||||
"""Copy the RethinkDB config file to the remote host"""
|
||||
put('conf/rethinkdb.conf',
|
||||
'/etc/rethinkdb/instances.d/instance1.conf',
|
||||
mode=0600,
|
||||
use_sudo=True)
|
||||
|
||||
|
||||
# Delete RethinkDB data
|
||||
@task
|
||||
@parallel
|
||||
def delete_rethinkdb_data():
|
||||
"""Delete the contents of the RethinkDB /data directory
|
||||
but not the directory itself.
|
||||
"""
|
||||
sudo('rm -rf /data/*')
|
||||
|
||||
|
||||
# Start RethinkDB
|
||||
@task
|
||||
@parallel
|
||||
def start_rethinkdb():
|
||||
"""Start RethinkDB"""
|
||||
sudo('/etc/init.d/rethinkdb restart')
|
||||
|
||||
|
||||
# Install BigchainDB from PyPI
|
||||
@ -197,13 +246,20 @@ def init_bigchaindb():
|
||||
run('bigchaindb init', pty=False)
|
||||
|
||||
|
||||
# Set the number of shards (in the backlog and bigchain tables)
|
||||
# Set the number of shards (in all tables)
|
||||
@task
|
||||
@hosts(public_dns_names[0])
|
||||
def set_shards(num_shards):
|
||||
run('bigchaindb set-shards {}'.format(num_shards))
|
||||
|
||||
|
||||
# Set the number of replicas (in all tables)
|
||||
@task
|
||||
@hosts(public_dns_names[0])
|
||||
def set_replicas(num_replicas):
|
||||
run('bigchaindb set-replicas {}'.format(num_replicas))
|
||||
|
||||
|
||||
# Start BigchainDB using screen
|
||||
@task
|
||||
@parallel
|
||||
|
@ -24,7 +24,8 @@ from awscommon import get_naeips
|
||||
|
||||
|
||||
SETTINGS = ['NUM_NODES', 'BRANCH', 'WHAT_TO_DEPLOY', 'USE_KEYPAIRS_FILE',
|
||||
'IMAGE_ID', 'INSTANCE_TYPE']
|
||||
'IMAGE_ID', 'INSTANCE_TYPE', 'USING_EBS', 'EBS_VOLUME_SIZE',
|
||||
'EBS_OPTIMIZED']
|
||||
|
||||
|
||||
class SettingsTypeError(TypeError):
|
||||
@ -76,7 +77,7 @@ if not isinstance(WHAT_TO_DEPLOY, str):
|
||||
raise SettingsTypeError('WHAT_TO_DEPLOY should be a string')
|
||||
|
||||
if not isinstance(USE_KEYPAIRS_FILE, bool):
|
||||
msg = 'USE_KEYPAIRS_FILE should a boolean (True or False)'
|
||||
msg = 'USE_KEYPAIRS_FILE should be a boolean (True or False)'
|
||||
raise SettingsTypeError(msg)
|
||||
|
||||
if not isinstance(IMAGE_ID, str):
|
||||
@ -85,6 +86,15 @@ if not isinstance(IMAGE_ID, str):
|
||||
if not isinstance(INSTANCE_TYPE, str):
|
||||
raise SettingsTypeError('INSTANCE_TYPE should be a string')
|
||||
|
||||
if not isinstance(USING_EBS, bool):
|
||||
raise SettingsTypeError('USING_EBS should be a boolean (True or False)')
|
||||
|
||||
if not isinstance(EBS_VOLUME_SIZE, int):
|
||||
raise SettingsTypeError('EBS_VOLUME_SIZE should be an int')
|
||||
|
||||
if not isinstance(EBS_OPTIMIZED, bool):
|
||||
raise SettingsTypeError('EBS_OPTIMIZED should be a boolean (True or False)')
|
||||
|
||||
if NUM_NODES > 64:
|
||||
raise ValueError('NUM_NODES should be less than or equal to 64. '
|
||||
'The AWS deployment configuration file sets it to {}'.
|
||||
@ -95,6 +105,12 @@ if WHAT_TO_DEPLOY not in ['servers', 'clients']:
|
||||
'The AWS deployment configuration file sets it to {}'.
|
||||
format(WHAT_TO_DEPLOY))
|
||||
|
||||
# Since we assume 'gp2' volumes (for now), the possible range is 1 to 16384
|
||||
if EBS_VOLUME_SIZE > 16384:
|
||||
raise ValueError('EBS_VOLUME_SIZE should be <= 16384. '
|
||||
'The AWS deployment configuration file sets it to {}'.
|
||||
format(EBS_VOLUME_SIZE))
|
||||
|
||||
# Get an AWS EC2 "resource"
|
||||
# See http://boto3.readthedocs.org/en/latest/guide/resources.html
|
||||
ec2 = boto3.resource(service_name='ec2')
|
||||
@ -158,14 +174,40 @@ print('Commencing launch of {} instances on Amazon EC2...'.
|
||||
for _ in range(NUM_NODES):
|
||||
# Request the launch of one instance at a time
|
||||
# (so list_of_instances should contain only one item)
|
||||
list_of_instances = ec2.create_instances(
|
||||
ImageId=IMAGE_ID,
|
||||
MinCount=1,
|
||||
MaxCount=1,
|
||||
KeyName='bigchaindb',
|
||||
InstanceType=INSTANCE_TYPE,
|
||||
SecurityGroupIds=['bigchaindb']
|
||||
)
|
||||
# See https://tinyurl.com/hbjewbb
|
||||
if USING_EBS:
|
||||
dm = {
|
||||
'DeviceName': '/dev/sdp',
|
||||
# Why /dev/sdp? See https://tinyurl.com/z2zqm6n
|
||||
'Ebs': {
|
||||
'VolumeSize': EBS_VOLUME_SIZE, # GiB
|
||||
'DeleteOnTermination': False,
|
||||
'VolumeType': 'gp2',
|
||||
'Encrypted': False
|
||||
},
|
||||
# 'NoDevice': 'device'
|
||||
# Suppresses the specified device included
|
||||
# in the block device mapping of the AMI.
|
||||
}
|
||||
list_of_instances = ec2.create_instances(
|
||||
ImageId=IMAGE_ID,
|
||||
MinCount=1,
|
||||
MaxCount=1,
|
||||
KeyName='bigchaindb',
|
||||
InstanceType=INSTANCE_TYPE,
|
||||
SecurityGroupIds=['bigchaindb'],
|
||||
BlockDeviceMappings=[dm],
|
||||
EbsOptimized=EBS_OPTIMIZED
|
||||
)
|
||||
else: # not USING_EBS
|
||||
list_of_instances = ec2.create_instances(
|
||||
ImageId=IMAGE_ID,
|
||||
MinCount=1,
|
||||
MaxCount=1,
|
||||
KeyName='bigchaindb',
|
||||
InstanceType=INSTANCE_TYPE,
|
||||
SecurityGroupIds=['bigchaindb']
|
||||
)
|
||||
|
||||
# Tag the just-launched instances (should be just one)
|
||||
for instance in list_of_instances:
|
||||
|
@ -1,32 +1,19 @@
|
||||
# Example RethinkDB Storage Setups
|
||||
|
||||
## Example 1: A Partition of an AWS Instance Store
|
||||
## Example Amazon EC2 Setups
|
||||
|
||||
Many [AWS EC2 instance types](https://aws.amazon.com/ec2/instance-types/) comes with an [instance store](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html): temporary storage that disappears when the instance disappears. The size and setup of an instance store depends on the EC2 instance type.
|
||||
We have some scripts for [deploying a _test_ BigchainDB cluster on AWS](../clusters-feds/deploy-on-aws.html). Those scripts include command sequences to set up storage for RethinkDB.
|
||||
In particular, look in the file [/deploy-cluster-aws/fabfile.py](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/fabfile.py), under `def prep_rethinkdb_storage(USING_EBS)`. Note that there are two cases:
|
||||
|
||||
We have some scripts for [deploying a _test_ BigchainDB cluster on AWS](../clusters-feds/deploy-on-aws.html). Those scripts include commands to set up a partition (`/dev/xvdb`) on an instance store for RethinkDB data. Those commands can be found in the file `/deploy-cluster-aws/fabfile.py`, under `def install_rethinkdb()` (i.e. the Fabric function to install RethinkDB).
|
||||
1. **Using EBS ([Amazon Elastic Block Store](https://aws.amazon.com/ebs/)).** This is always an option, and for some instance types ("EBS-only"), it's the only option.
|
||||
2. **Using an "instance store" volume provided with an Amazon EC2 instance.** Note that our scripts only use one of the (possibly many) volumes in the instance store.
|
||||
|
||||
An AWS instance store is convenient, but it's intended for "buffers, caches, scratch data, and other temporary content." Moreover:
|
||||
There's some explanation of the steps in the [Amazon EC2 documentation about making an Amazon EBS volume available for use](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-using-volumes.html).
|
||||
|
||||
* You pay for all the storage, regardless of how much you use.
|
||||
* You can't increase the size of the instance store.
|
||||
* If the instance stops, terminates, or reboots, you lose the associated instance store.
|
||||
* Instance store data isn't replicated, so if the underlying disk drive fails, you lose the data in the instance store.
|
||||
* "You can't detach an instance store volume from one instance and attach it to a different instance."
|
||||
|
||||
The [AWS documentation says](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html), "...do not rely on instance store for valuable, long-term data. Instead, you can build a degree of redundancy (for example, RAID 1/5/6), or use a file system (for example, HDFS and MapR-FS) that supports redundancy and fault tolerance."
|
||||
|
||||
**Even if you don't use an AWS instance store partition to store your node's RethinkDB data, you may find it useful to read the steps in `def install_rethinkdb()`: [see fabfile.py](https://github.com/bigchaindb/bigchaindb/blob/master/deploy-cluster-aws/fabfile.py).**
|
||||
You shouldn't use an EC2 "instance store" to store RethinkDB data for a production node, because it's not replicated and it's only intended for temporary, ephemeral data. If the associated instance crashes, is stopped, or is terminated, the data in the instance store is lost forever. Amazon EBS storage is replicated, has incremental snapshots, and is low-latency.
|
||||
|
||||
|
||||
## Example 2: An Amazon EBS Volume
|
||||
|
||||
TODO
|
||||
|
||||
Note: Amazon EBS volumes are always replicated.
|
||||
|
||||
|
||||
## Example 3: Using Amazon EFS
|
||||
## Example Using Amazon EFS
|
||||
|
||||
TODO
|
||||
|
||||
|
@ -103,6 +103,9 @@ WHAT_TO_DEPLOY="servers"
|
||||
USE_KEYPAIRS_FILE=False
|
||||
IMAGE_ID="ami-accff2b1"
|
||||
INSTANCE_TYPE="m3.2xlarge"
|
||||
USING_EBS=False
|
||||
EBS_VOLUME_SIZE=30
|
||||
EBS_OPTIMIZED=False
|
||||
```
|
||||
|
||||
If you're happy with those settings, then you can skip to the next step. Otherwise, you could make a copy of `example_deploy_conf.py` (e.g. `cp example_deploy_conf.py my_deploy_conf.py`) and then edit the copy using a text editor.
|
||||
@ -126,6 +129,8 @@ Step 3 is to launch the nodes ("instances") on AWS, to install all the necessary
|
||||
cd bigchaindb
|
||||
cd deploy-cluster-aws
|
||||
./awsdeploy.sh my_deploy_conf.py
|
||||
# Only if you want to set the replication factor to 3
|
||||
fab set_replicas:3
|
||||
# Only if you want to start BigchainDB on all the nodes:
|
||||
fab start_bigchaindb
|
||||
```
|
||||
|
Loading…
x
Reference in New Issue
Block a user