From 3e7c2dff96f64d632caac8ba33f897d6584cc97b Mon Sep 17 00:00:00 2001 From: Brian Waldon Date: Fri, 17 Jan 2014 16:15:54 -0800 Subject: [PATCH] feat(metrics): Add documentation and contrib scripts --- Documentation/debugging.md | 69 ++ Documentation/profiling.md | 28 - contrib/collectd/Dockerfile | 9 + contrib/collectd/README | 20 + contrib/collectd/collectd-wrapper | 16 + contrib/collectd/collectd.conf | 898 ++++++++++++++++++++++++++ contrib/graphite/Dockerfile | 31 + contrib/graphite/README | 7 + contrib/graphite/carbon.conf | 62 ++ contrib/graphite/initial_data.json | 20 + contrib/graphite/local_settings.py | 1 + contrib/graphite/nginx.conf | 69 ++ contrib/graphite/storage-schemas.conf | 7 + contrib/graphite/supervisord.conf | 25 + 14 files changed, 1234 insertions(+), 28 deletions(-) create mode 100644 Documentation/debugging.md delete mode 100644 Documentation/profiling.md create mode 100644 contrib/collectd/Dockerfile create mode 100644 contrib/collectd/README create mode 100755 contrib/collectd/collectd-wrapper create mode 100644 contrib/collectd/collectd.conf create mode 100644 contrib/graphite/Dockerfile create mode 100644 contrib/graphite/README create mode 100644 contrib/graphite/carbon.conf create mode 100644 contrib/graphite/initial_data.json create mode 100644 contrib/graphite/local_settings.py create mode 100644 contrib/graphite/nginx.conf create mode 100644 contrib/graphite/storage-schemas.conf create mode 100644 contrib/graphite/supervisord.conf diff --git a/Documentation/debugging.md b/Documentation/debugging.md new file mode 100644 index 000000000..86561a702 --- /dev/null +++ b/Documentation/debugging.md @@ -0,0 +1,69 @@ +# Debugging etcd + +Diagnosing issues in a distributed application is hard. +etcd will help as much as it can - just enable these debug features using the CLI flag `-trace=*` or the config option `trace=*`. + +## Logging + +Log verbosity can be increased to the max using either the `-vvv` CLI flag or the `very_very_verbose=true` config option. + +The only supported logging mode is to stdout. + +## Metrics + +etcd itself can generate a set of metrics. +These metrics represent many different internal data points that can be helpful when debugging etcd servers. + +#### Metrics reference + +Each individual metric name is prefixed with `etcd.`, where \ is the configured name of the etcd server. + +* `timer.appendentries.handle`: amount of time a peer takes to process an AppendEntriesRequest from the POV of the peer itself +* `timer.peer..heartbeat`: amount of time a peer heartbeat operation takes from the POV of the leader that initiated that operation for peer \ +* `timer.command.`: amount of time a given command took to be processed through the local server's raft state machine. This does not include time waiting on locks. + +#### Fetching metrics over HTTP + +Once tracing has been enabled on a given etcd server, all metric data is available at the server's `/debug/metrics` HTTP endpoint (i.e. `http://127.0.0.1:4001/debug/metrics`). +Executing a GET HTTP command against the metrics endpoint will yield the current state of all metrics in the etcd server. + +#### Sending metrics to Graphite + +etcd supports [Graphite's Carbon plaintext protocol](https://graphite.readthedocs.org/en/latest/feeding-carbon.html#the-plaintext-protocol) - a TCP wire protocol designed for shipping metric data to an aggregator. +To send metrics to a Graphite endpoint using this protocol, use of the `-graphite-host` CLI flag or the `graphite_host` config option (i.e. `graphite_host=172.17.0.19:2003`). + +See an [example graphite deploy script](https://github.com/coreos/etcd/contrib/graphite). + +#### Generating additional metrics with Collectd + +[Collectd](http://collectd.org/documentation.shtml) gathers metrics from the host running etcd. +While these aren't metrics generated by etcd itself, it can be invaluable to compare etcd's view of the world to that of a separate process running next to etcd. + +See an [example collectd deploy script](https://github.com/coreos/etcd/contrib/collectd). + +## Profiling + +etcd exposes profiling information from the Go pprof package over HTTP. +The basic browseable interface is served by etcd at the `/debug/pprof` HTTP endpoint (i.e. `http://127.0.0.1:4001/debug/pprof`). +For more information on using profiling tools, see http://blog.golang.org/profiling-go-programs. + +**NOTE**: In the following examples you need to ensure that the `./bin/etcd` is identical to the `./bin/etcd` that you are targetting (same git hash, arch, platform, etc). + +#### Heap memory profile + +``` +go tool pprof ./bin/etcd http://127.0.0.1:4001/debug/pprof/heap +``` + +#### CPU profile + +``` +go tool pprof ./bin/etcd http://127.0.0.1:4001/debug/pprof/profile +``` + +#### Blocked goroutine profile + +``` +go tool pprof ./bin/etcd http://127.0.0.1:4001/debug/pprof/block +``` + diff --git a/Documentation/profiling.md b/Documentation/profiling.md deleted file mode 100644 index dd63529de..000000000 --- a/Documentation/profiling.md +++ /dev/null @@ -1,28 +0,0 @@ -## Profiling - -etcd exposes profiling information from the Go pprof package over HTTP. -The basic browseable interface can be found at `http://127.0.0.1:4001/debug/pprof`. - -**NOTE**: In the following examples you need to ensure that the `./bin/etcd` is -identical to the `./bin/etcd` that you are targetting (same git hash, arch, -platform, etc). - -### Heap memory profile - -``` -go tool pprof ./bin/etcd http://127.0.0.1:4001/debug/pprof/heap -``` - -### CPU profile - -``` -go tool pprof ./bin/etcd http://127.0.0.1:4001/debug/pprof/profile -``` - -### Blocked goroutine profile - -``` -go tool pprof ./bin/etcd http://127.0.0.1:4001/debug/pprof/block -``` - -For more information on using the tools see http://blog.golang.org/profiling-go-programs diff --git a/contrib/collectd/Dockerfile b/contrib/collectd/Dockerfile new file mode 100644 index 000000000..b377c4d63 --- /dev/null +++ b/contrib/collectd/Dockerfile @@ -0,0 +1,9 @@ +FROM stackbrew/ubuntu:raring + +RUN apt-get update && apt-get install -y collectd +RUN adduser --system --group --no-create-home collectd +ADD collectd.conf /etc/collectd/collectd.conf.tmpl +ADD collectd-wrapper /bin/collectd-wrapper +RUN chown -R collectd:collectd /etc/collectd + +CMD ["collectd-wrapper"] diff --git a/contrib/collectd/README b/contrib/collectd/README new file mode 100644 index 000000000..61b879ffa --- /dev/null +++ b/contrib/collectd/README @@ -0,0 +1,20 @@ +We're going to use Docker to build a chroot env that can be run with systemd-nspawn since I cannot figure out how to run +a container using docker in the global network namespace. + +1. Build the collectd image using docker +docker build -t collectd . + +2. Run the container (since we have to run it to export it...) +COLLECTD_CONTAINER=`docker run -name collectd-tmp -d collectd` + +3. Export then kill the container +docker export collectd-tmp > /tmp/collectd.tar + +4. Kill the temporary container +docker kill $COLLECTD_CONTAINER + +5. Unpack the tar archive +mkdir -p /tmp/collectd && tar -xvf /tmp/collectd.tar -C /tmp/collectd/ + +6. Run collectd with systemd-nspawn - replace the COLLECTD_* env vars with your parameters! +sudo systemd-run --unit collectd systemd-nspawn -D /tmp/collectd /bin/bash -c "COLLECTD_GRAPHITE_HOSTNAME=172.31.13.241 COLLECTD_LOCAL_HOSTNAME=node1 /bin/collectd-wrapper" diff --git a/contrib/collectd/collectd-wrapper b/contrib/collectd/collectd-wrapper new file mode 100755 index 000000000..fe023b12b --- /dev/null +++ b/contrib/collectd/collectd-wrapper @@ -0,0 +1,16 @@ +#!/bin/bash + +cat /etc/collectd/collectd.conf.tmpl > /etc/collectd/collectd.conf + +cat << EOF >> /etc/collectd/collectd.conf +Hostname "${COLLECTD_LOCAL_HOSTNAME}" + + + + Host "${COLLECTD_GRAPHITE_HOSTNAME}" + Port "2003" + + +EOF + +collectd -C /etc/collectd/collectd.conf -f diff --git a/contrib/collectd/collectd.conf b/contrib/collectd/collectd.conf new file mode 100644 index 000000000..dc5c319a4 --- /dev/null +++ b/contrib/collectd/collectd.conf @@ -0,0 +1,898 @@ +# Config file for collectd(1). +# +# Some plugins need additional configuration and are disabled by default. +# Please read collectd.conf(5) for details. +# +# You should also read /usr/share/doc/collectd-core/README.Debian.plugins +# before enabling any more plugins. + +#Hostname "localhost" +#FQDNLookup true +#BaseDir "/var/lib/collectd" +#PluginDir "/usr/lib/collectd" +#TypesDB "/usr/share/collectd/types.db" "/etc/collectd/my_types.db" +#Interval 10 +#Timeout 2 +#ReadThreads 5 + +LoadPlugin logfile +#LoadPlugin syslog + + + LogLevel "info" + File STDOUT + Timestamp true + PrintSeverity false + + +# +# LogLevel info +# + +#LoadPlugin amqp +#LoadPlugin apache +#LoadPlugin apcups +#LoadPlugin ascent +#LoadPlugin battery +#LoadPlugin bind +#LoadPlugin conntrack +#LoadPlugin contextswitch +LoadPlugin cpu +#LoadPlugin cpufreq +#LoadPlugin csv +#LoadPlugin curl +#LoadPlugin curl_json +#LoadPlugin curl_xml +#LoadPlugin dbi +LoadPlugin df +#LoadPlugin disk +#LoadPlugin dns +#LoadPlugin email +#LoadPlugin entropy +#LoadPlugin ethstat +#LoadPlugin exec +#LoadPlugin filecount +#LoadPlugin fscache +#LoadPlugin gmond +#LoadPlugin hddtemp +#LoadPlugin interface +#LoadPlugin ipmi +#LoadPlugin iptables +#LoadPlugin ipvs +#LoadPlugin irq +#LoadPlugin java +#LoadPlugin libvirt +#LoadPlugin load +#LoadPlugin madwifi +#LoadPlugin mbmon +#LoadPlugin md +#LoadPlugin memcachec +#LoadPlugin memcached +LoadPlugin memory +#LoadPlugin multimeter +#LoadPlugin mysql +#LoadPlugin netlink +#LoadPlugin network +#LoadPlugin nfs +#LoadPlugin nginx +#LoadPlugin notify_desktop +#LoadPlugin notify_email +#LoadPlugin ntpd +#LoadPlugin numa +#LoadPlugin nut +#LoadPlugin olsrd +#LoadPlugin openvpn +# +# Globals true +# +#LoadPlugin pinba +#LoadPlugin ping +#LoadPlugin postgresql +#LoadPlugin powerdns +#LoadPlugin processes +#LoadPlugin protocols +# +# Globals true +# +#LoadPlugin rrdcached +#LoadPlugin rrdtool +#LoadPlugin sensors +#LoadPlugin serial +#LoadPlugin snmp +#LoadPlugin swap +#LoadPlugin table +#LoadPlugin tail +LoadPlugin tcpconns +#LoadPlugin teamspeak2 +#LoadPlugin ted +#LoadPlugin thermal +#LoadPlugin tokyotyrant +#LoadPlugin unixsock +#LoadPlugin uptime +#LoadPlugin users +#LoadPlugin uuid +#LoadPlugin varnish +#LoadPlugin vmem +#LoadPlugin vserver +#LoadPlugin wireless +LoadPlugin write_graphite +#LoadPlugin write_http +#LoadPlugin write_mongodb + +# +# +# Host "localhost" +# Port "5672" +# VHost "/" +# User "guest" +# Password "guest" +# Exchange "amq.fanout" +# RoutingKey "collectd" +# Persistent false +# StoreRates false +# +# + +# +# +# URL "http://localhost/server-status?auto" +# User "www-user" +# Password "secret" +# VerifyPeer false +# VerifyHost false +# CACert "/etc/ssl/ca.crt" +# Server "apache" +# +# +# +# URL "http://some.domain.tld/status?auto" +# Host "some.domain.tld" +# Server "lighttpd" +# +# + +# +# Host "localhost" +# Port "3551" +# + +# +# URL "http://localhost/ascent/status/" +# User "www-user" +# Password "secret" +# VerifyPeer false +# VerifyHost false +# CACert "/etc/ssl/ca.crt" +# + +# +# URL "http://localhost:8053/" +# +# ParseTime false +# +# OpCodes true +# QTypes true +# ServerStats true +# ZoneMaintStats true +# ResolverStats false +# MemoryStats true +# +# +# QTypes true +# ResolverStats true +# CacheRRSets true +# +# Zone "127.in-addr.arpa/IN" +# +# + +# +# DataDir "/var/lib/collectd/csv" +# StoreRates false +# + +# +# +# URL "http://finance.google.com/finance?q=NYSE%3AAMD" +# User "foo" +# Password "bar" +# VerifyPeer false +# VerifyHost false +# CACert "/etc/ssl/ca.crt" +# MeasureResponseTime false +# +# Regex "]*> *([0-9]*\\.[0-9]+) *" +# DSType "GaugeAverage" +# Type "stock_value" +# Instance "AMD" +# +# +# + +# +## See: http://wiki.apache.org/couchdb/Runtime_Statistics +# +# Instance "httpd" +# +# Type "http_requests" +# +# +# +# Type "http_request_methods" +# +# +# +# Type "http_response_codes" +# +# +## Database status metrics: +# +# Instance "dbs" +# +# Type "gauge" +# +# +# Type "counter" +# +# +# Type "bytes" +# +# +# + +# +# +# Host "my_host" +# Instance "some_instance" +# User "collectd" +# Password "thaiNg0I" +# VerifyPeer true +# VerifyHost true +# CACert "/path/to/ca.crt" +# +# +# Type "magic_level" +# InstancePrefix "prefix-" +# InstanceFrom "td[1]" +# ValuesFrom "td[2]/span[@class=\"level\"]" +# +# +# + +# +# +# Statement "SELECT 'customers' AS c_key, COUNT(*) AS c_value \ +# FROM customers_tbl" +# MinVersion 40102 +# MaxVersion 50042 +# +# Type "gauge" +# InstancePrefix "customer" +# InstancesFrom "c_key" +# ValuesFrom "c_value" +# +# +# +# +# Driver "mysql" +# DriverOption "host" "localhost" +# DriverOption "username" "collectd" +# DriverOption "password" "secret" +# DriverOption "dbname" "custdb0" +# SelectDB "custdb0" +# Query "num_of_customers" +# Query "..." +# +# + +# +# Device "/dev/sda1" +# Device "192.168.0.2:/mnt/nfs" +# MountPoint "/home" +# FSType "ext3" +# IgnoreSelected false +# ReportByDevice false +# ReportReserved false +# ReportInodes false +# + +# +# Disk "hda" +# Disk "/sda[23]/" +# IgnoreSelected false +# + +# +# Interface "eth0" +# IgnoreSource "192.168.0.1" +# SelectNumericQueryTypes false +# + +# +# SocketFile "/var/run/collectd-email" +# SocketGroup "collectd" +# SocketPerms "0770" +# MaxConns 5 +# + +# +# Interface "eth0" +# Map "rx_csum_offload_errors" "if_rx_errors" "checksum_offload" +# Map "multicast" "if_multicast" +# MappedOnly false +# + +# +# Exec user "/path/to/exec" +# Exec "user:group" "/path/to/exec" +# NotificationExec user "/path/to/exec" +# + +# +# +# Instance "foodir" +# Name "*.conf" +# MTime "-5m" +# Size "+10k" +# Recursive true +# IncludeHidden false +# +# + +# +# MCReceiveFrom "239.2.11.71" "8649" +# +# +# Type "swap" +# TypeInstance "total" +# DataSource "value" +# +# +# +# Type "swap" +# TypeInstance "free" +# DataSource "value" +# +# + +# +# Host "127.0.0.1" +# Port 7634 +# + +# +# Interface "eth0" +# IgnoreSelected false +# + +# +# Sensor "some_sensor" +# Sensor "another_one" +# IgnoreSelected false +# NotifySensorAdd false +# NotifySensorRemove true +# NotifySensorNotPresent false +# + +# +# Chain "table" "chain" +# + +# +# Irq 7 +# Irq 8 +# Irq 9 +# IgnoreSelected true +# + +# +# JVMArg "-verbose:jni" +# JVMArg "-Djava.class.path=/usr/share/collectd/java/collectd-api.jar" +# +# LoadPlugin "org.collectd.java.GenericJMX" +# +# # See /usr/share/doc/collectd/examples/GenericJMX.conf +# # for an example config. +# +# + +# +# Connection "xen:///" +# RefreshInterval 60 +# Domain "name" +# BlockDevice "name:device" +# InterfaceDevice "name:device" +# IgnoreSelected false +# HostnameFormat name +# InterfaceFormat name +# + +# +# Interface "wlan0" +# IgnoreSelected false +# Source "SysFS" +# WatchSet "None" +# WatchAdd "node_octets" +# WatchAdd "node_rssi" +# WatchAdd "is_rx_acl" +# WatchAdd "is_scan_active" +# + +# +# Host "127.0.0.1" +# Port 411 +# + +# +# Device "/dev/md0" +# IgnoreSelected false +# + +# +# +# Server "localhost" +# Key "page_key" +# +# Regex "(\\d+) bytes sent" +# ExcludeRegex "" +# DSType CounterAdd +# Type "ipt_octets" +# Instance "type_instance" +# +# +# + +# +# Socket "/var/run/memcached.sock" +# or: +# Host "127.0.0.1" +# Port "11211" +# + +# +# +# Host "database.serv.er" +# Port "3306" +# User "db_user" +# Password "secret" +# Database "db_name" +# MasterStats true +# +# +# +# Host "localhost" +# Socket "/var/run/mysql/mysqld.sock" +# SlaveStats true +# SlaveNotifications true +# +# + +# +# Interface "All" +# VerboseInterface "All" +# QDisc "eth0" "pfifo_fast-1:0" +# Class "ppp0" "htb-1:10" +# Filter "ppp0" "u32-1:0" +# IgnoreSelected false +# + +# +# # client setup: +# Server "ff18::efc0:4a42" "25826" +# +# SecurityLevel Encrypt +# Username "user" +# Password "secret" +# Interface "eth0" +# +# TimeToLive "128" +# +# # server setup: +# Listen "0.0.0.0" "25826" +# +# SecurityLevel Sign +# AuthFile "/etc/collectd/passwd" +# Interface "eth0" +# +# MaxPacketSize 1024 +# +# # proxy setup (client and server as above): +# Forward true +# +# # statistics about the network plugin itself +# ReportStats false +# +# # "garbage collection" +# CacheFlush 1800 +# + +# +# URL "http://localhost/status?auto" +# User "www-user" +# Password "secret" +# VerifyPeer false +# VerifyHost false +# CACert "/etc/ssl/ca.crt" +# + +# +# OkayTimeout 1000 +# WarningTimeout 5000 +# FailureTimeout 0 +# + +# +# SMTPServer "localhost" +# SMTPPort 25 +# SMTPUser "my-username" +# SMTPPassword "my-password" +# From "collectd@main0server.com" +# # on . +# # Beware! Do not use not more than two placeholders (%)! +# Subject "[collectd] %s on %s!" +# Recipient "email1@domain1.net" +# Recipient "email2@domain2.com" +# + +# +# Host "localhost" +# Port 123 +# ReverseLookups false +# + +# +# UPS "upsname@hostname:port" +# + +# +# Host "127.0.0.1" +# Port "2006" +# CollectLinks "Summary" +# CollectRoutes "Summary" +# CollectTopology "Summary" +# + +# +# StatusFile "/etc/openvpn/openvpn-status.log" +# ImprovedNamingSchema false +# CollectCompression true +# CollectIndividualUsers true +# CollectUserCount false +# + +# +# IncludeDir "/my/include/path" +# BaseName "Collectd::Plugins" +# EnableDebugger "" +# LoadPlugin Monitorus +# LoadPlugin OpenVZ +# +# +# Foo "Bar" +# Qux "Baz" +# +# + +# +# Address "::0" +# Port "30002" +# +# Host "host name" +# Server "server name" +# Script "script name" +# +# + +# +# Host "host.foo.bar" +# Host "host.baz.qux" +# Interval 1.0 +# Timeout 0.9 +# TTL 255 +# SourceAddress "1.2.3.4" +# Device "eth0" +# MaxMissed -1 +# + +# +# +# Statement "SELECT magic FROM wizard WHERE host = $1;" +# Param hostname +# +# +# Type gauge +# InstancePrefix "magic" +# ValuesFrom "magic" +# +# +# +# +# Statement "SELECT COUNT(type) AS count, type \ +# FROM (SELECT CASE \ +# WHEN resolved = 'epoch' THEN 'open' \ +# ELSE 'resolved' END AS type \ +# FROM tickets) type \ +# GROUP BY type;" +# +# +# Type counter +# InstancePrefix "rt36_tickets" +# InstancesFrom "type" +# ValuesFrom "count" +# +# +# +# +# Host "hostname" +# Port 5432 +# User "username" +# Password "secret" +# +# SSLMode "prefer" +# KRBSrvName "kerberos_service_name" +# +# Query magic +# +# +# +# Interval 60 +# Service "service_name" +# +# Query backend # predefined +# Query rt36_tickets +# +# + +# +# +# Collect "latency" +# Collect "udp-answers" "udp-queries" +# Socket "/var/run/pdns.controlsocket" +# +# +# Collect "questions" +# Collect "cache-hits" "cache-misses" +# Socket "/var/run/pdns_recursor.controlsocket" +# +# LocalSocket "/opt/collectd/var/run/collectd-powerdns" +# + +# +# Process "name" +# ProcessMatch "foobar" "/usr/bin/perl foobar\\.pl.*" +# + +# +# Value "/^Tcp:/" +# IgnoreSelected false +# + +# +# ModulePath "/path/to/your/python/modules" +# LogTraces true +# Interactive true +# Import "spam" +# +# +# spam "wonderful" "lovely" +# +# + +# +# DaemonAddress "unix:/var/run/rrdcached.sock" +# DataDir "/var/lib/rrdcached/db/collectd" +# CreateFiles true +# CollectStatistics true +# + + + DataDir "/var/lib/collectd/rrd" +# CacheTimeout 120 +# CacheFlush 900 +# WritesPerSecond 30 +# RandomTimeout 0 +# +# The following settings are rather advanced +# and should usually not be touched: +# StepSize 10 +# HeartBeat 20 +# RRARows 1200 +# RRATimespan 158112000 +# XFF 0.1 + + +# +# SensorConfigFile "/etc/sensors3.conf" +# Sensor "it8712-isa-0290/temperature-temp1" +# Sensor "it8712-isa-0290/fanspeed-fan3" +# Sensor "it8712-isa-0290/voltage-in8" +# IgnoreSelected false +# + +# See /usr/share/doc/collectd/examples/snmp-data.conf.gz for a +# comprehensive sample configuration. +# +# +# Type "voltage" +# Table false +# Instance "input_line1" +# Scale 0.1 +# Values "SNMPv2-SMI::enterprises.6050.5.4.1.1.2.1" +# +# +# Type "users" +# Table false +# Instance "" +# Shift -1 +# Values "HOST-RESOURCES-MIB::hrSystemNumUsers.0" +# +# +# Type "if_octets" +# Table true +# InstancePrefix "traffic" +# Instance "IF-MIB::ifDescr" +# Values "IF-MIB::ifInOctets" "IF-MIB::ifOutOctets" +# +# +# +# Address "192.168.0.2" +# Version 1 +# Community "community_string" +# Collect "std_traffic" +# Inverval 120 +# +# +# Address "192.168.0.42" +# Version 2 +# Community "another_string" +# Collect "std_traffic" "hr_users" +# +# +# Address "192.168.0.3" +# Version 1 +# Community "more_communities" +# Collect "powerplus_voltge_input" +# Interval 300 +# +# + +# +# ReportByDevice false +# + +# +# +# Instance "slabinfo" +# Separator " " +# +# Type gauge +# InstancePrefix "active_objs" +# InstancesFrom 0 +# ValuesFrom 1 +# +# +# Type gauge +# InstancePrefix "objperslab" +# InstancesFrom 0 +# ValuesFrom 4 +# +#
+#
+ +# +# +# Instance "exim" +# +# Regex "S=([1-9][0-9]*)" +# DSType "CounterAdd" +# Type "ipt_bytes" +# Instance "total" +# +# +# Regex "\\" +# ExcludeRegex "\\.*mail_spool defer" +# DSType "CounterInc" +# Type "counter" +# Instance "local_user" +# +# +# + + + LocalPort "4001" + LocalPort "7001" + + +# +# Host "127.0.0.1" +# Port "51234" +# Server "8767" +# + +# +# Device "/dev/ttyUSB0" +# Retries 0 +# + +# +# ForceUseProcfs false +# Device "THRM" +# IgnoreSelected false +# + +# +# Host "localhost" +# Port "1978" +# + +# +# SocketFile "/var/run/collectd-unixsock" +# SocketGroup "collectd" +# SocketPerms "0660" +# DeleteSocket false +# + +# +# UUIDFile "/etc/uuid" +# + +# +# +# CollectCache true +# CollectBackend true +# CollectConnections true +# CollectSHM true +# CollectESI false +# CollectFetch false +# CollectHCB false +# CollectSMA false +# CollectSMS false +# CollectSM false +# CollectTotals false +# CollectWorkers false +# +# +# +# CollectCache true +# +# + +# +# Verbose false +# + +# +# +# Host "127.0.01" +# Port "2003" +# Prefix "collectd" +# Postfix "collectd" +# StoreRates false +# AlwaysAppendDS false +# EscapeCharacter "_" +# +# + +# +# +# User "collectd" +# Password "secret" +# VerifyPeer true +# VerifyHost true +# CACert "/etc/ssl/ca.crt" +# Format "Command" +# StoreRates false +# +# + +# +# +# Host "localhost" +# Port "27017" +# Timeout 1000 +# StoreRates false +# +# + +Include "/etc/collectd/filters.conf" +Include "/etc/collectd/thresholds.conf" diff --git a/contrib/graphite/Dockerfile b/contrib/graphite/Dockerfile new file mode 100644 index 000000000..859afc5a8 --- /dev/null +++ b/contrib/graphite/Dockerfile @@ -0,0 +1,31 @@ +from stackbrew/ubuntu:precise + +run echo 'deb http://us.archive.ubuntu.com/ubuntu/ precise universe' >> /etc/apt/sources.list +run apt-get -y update + +# Install required packages +run apt-get -y install python-cairo python-django python-twisted python-django-tagging python-simplejson python-pysqlite2 python-support python-pip gunicorn supervisor nginx-light +run pip install whisper +run pip install --install-option="--prefix=/var/lib/graphite" --install-option="--install-lib=/var/lib/graphite/lib" carbon +run pip install --install-option="--prefix=/var/lib/graphite" --install-option="--install-lib=/var/lib/graphite/webapp" graphite-web + +# Add system service config +add ./nginx.conf /etc/nginx/nginx.conf +add ./supervisord.conf /etc/supervisor/conf.d/supervisord.conf + +# Add graphite config +add ./initial_data.json /var/lib/graphite/webapp/graphite/initial_data.json +add ./local_settings.py /var/lib/graphite/webapp/graphite/local_settings.py +add ./carbon.conf /var/lib/graphite/conf/carbon.conf +add ./storage-schemas.conf /var/lib/graphite/conf/storage-schemas.conf +run mkdir -p /var/lib/graphite/storage/whisper +run touch /var/lib/graphite/storage/graphite.db /var/lib/graphite/storage/index +run chown -R www-data /var/lib/graphite/storage +run chmod 0775 /var/lib/graphite/storage /var/lib/graphite/storage/whisper +run chmod 0664 /var/lib/graphite/storage/graphite.db +run cd /var/lib/graphite/webapp/graphite && python manage.py syncdb --noinput + +expose :80 +expose :2003 + +cmd ["/usr/bin/supervisord"] diff --git a/contrib/graphite/README b/contrib/graphite/README new file mode 100644 index 000000000..069b621f6 --- /dev/null +++ b/contrib/graphite/README @@ -0,0 +1,7 @@ +Running graphite under Docker is straightforward: + +1. Build the graphite image using Docker +docker build -t graphite . + +2. Run a graphite container. Be sure to replace the $IP field with the IP address at which you wish to expose your graphite web service. +docker run -p $IP:8080:80 -p $IP:2003:2003 -d graphite diff --git a/contrib/graphite/carbon.conf b/contrib/graphite/carbon.conf new file mode 100644 index 000000000..1eed4e8fa --- /dev/null +++ b/contrib/graphite/carbon.conf @@ -0,0 +1,62 @@ +[cache] +LOCAL_DATA_DIR = /var/lib/graphite/storage/whisper/ + +# Specify the user to drop privileges to +# If this is blank carbon runs as the user that invokes it +# This user must have write access to the local data directory +USER = + +# Limit the size of the cache to avoid swapping or becoming CPU bound. +# Sorts and serving cache queries gets more expensive as the cache grows. +# Use the value "inf" (infinity) for an unlimited cache size. +MAX_CACHE_SIZE = inf + +# Limits the number of whisper update_many() calls per second, which effectively +# means the number of write requests sent to the disk. This is intended to +# prevent over-utilizing the disk and thus starving the rest of the system. +# When the rate of required updates exceeds this, then carbon's caching will +# take effect and increase the overall throughput accordingly. +MAX_UPDATES_PER_SECOND = 1000 + +# Softly limits the number of whisper files that get created each minute. +# Setting this value low (like at 50) is a good way to ensure your graphite +# system will not be adversely impacted when a bunch of new metrics are +# sent to it. The trade off is that it will take much longer for those metrics' +# database files to all get created and thus longer until the data becomes usable. +# Setting this value high (like "inf" for infinity) will cause graphite to create +# the files quickly but at the risk of slowing I/O down considerably for a while. +MAX_CREATES_PER_MINUTE = inf + +LINE_RECEIVER_INTERFACE = 0.0.0.0 +LINE_RECEIVER_PORT = 2003 + +#PICKLE_RECEIVER_INTERFACE = 0.0.0.0 +#PICKLE_RECEIVER_PORT = 2004 + +#CACHE_QUERY_INTERFACE = 0.0.0.0 +#CACHE_QUERY_PORT = 7002 + +LOG_UPDATES = False + +# Enable AMQP if you want to receve metrics using an amqp broker +# ENABLE_AMQP = False + +# Verbose means a line will be logged for every metric received +# useful for testing +# AMQP_VERBOSE = False + +# AMQP_HOST = localhost +# AMQP_PORT = 5672 +# AMQP_VHOST = / +# AMQP_USER = guest +# AMQP_PASSWORD = guest +# AMQP_EXCHANGE = graphite + +# Patterns for all of the metrics this machine will store. Read more at +# http://en.wikipedia.org/wiki/Advanced_Message_Queuing_Protocol#Bindings +# +# Example: store all sales, linux servers, and utilization metrics +# BIND_PATTERNS = sales.#, servers.linux.#, #.utilization +# +# Example: store everything +# BIND_PATTERNS = # diff --git a/contrib/graphite/initial_data.json b/contrib/graphite/initial_data.json new file mode 100644 index 000000000..b3ac9b1eb --- /dev/null +++ b/contrib/graphite/initial_data.json @@ -0,0 +1,20 @@ +[ + { + "pk": 1, + "model": "auth.user", + "fields": { + "username": "admin", + "first_name": "", + "last_name": "", + "is_active": true, + "is_superuser": true, + "is_staff": true, + "last_login": "2011-09-20 17:02:14", + "groups": [], + "user_permissions": [], + "password": "sha1$1b11b$edeb0a67a9622f1f2cfeabf9188a711f5ac7d236", + "email": "root@example.com", + "date_joined": "2011-09-20 17:02:14" + } + } +] diff --git a/contrib/graphite/local_settings.py b/contrib/graphite/local_settings.py new file mode 100644 index 000000000..7cff8f793 --- /dev/null +++ b/contrib/graphite/local_settings.py @@ -0,0 +1 @@ +TIME_ZONE = 'UTC' diff --git a/contrib/graphite/nginx.conf b/contrib/graphite/nginx.conf new file mode 100644 index 000000000..370b4a7f8 --- /dev/null +++ b/contrib/graphite/nginx.conf @@ -0,0 +1,69 @@ +daemon off; +user www-data; +worker_processes 1; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + server_tokens off; + + server_names_hash_bucket_size 32; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; + + gzip on; + gzip_disable "msie6"; + + server { + listen 80 default_server; + server_name _; + + open_log_file_cache max=1000 inactive=20s min_uses=2 valid=1m; + + location / { + proxy_pass http://127.0.0.1:8000; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Server $host; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header Host $host; + + client_max_body_size 10m; + client_body_buffer_size 128k; + + proxy_connect_timeout 90; + proxy_send_timeout 90; + proxy_read_timeout 90; + + proxy_buffer_size 4k; + proxy_buffers 4 32k; + proxy_busy_buffers_size 64k; + proxy_temp_file_write_size 64k; + } + + add_header Access-Control-Allow-Origin "*"; + add_header Access-Control-Allow-Methods "GET, OPTIONS"; + add_header Access-Control-Allow-Headers "origin, authorization, accept"; + + location /content { + alias /var/lib/graphite/webapp/content; + } + + location /media { + alias /usr/share/pyshared/django/contrib/admin/media; + } + } +} diff --git a/contrib/graphite/storage-schemas.conf b/contrib/graphite/storage-schemas.conf new file mode 100644 index 000000000..855a9e4eb --- /dev/null +++ b/contrib/graphite/storage-schemas.conf @@ -0,0 +1,7 @@ +[carbon] +pattern = ^carbon\..* +retentions = 1m:31d,10m:1y,1h:5y + +[default] +pattern = .* +retentions = 10s:8d,1m:31d,10m:1y,1h:5y diff --git a/contrib/graphite/supervisord.conf b/contrib/graphite/supervisord.conf new file mode 100644 index 000000000..f41541a48 --- /dev/null +++ b/contrib/graphite/supervisord.conf @@ -0,0 +1,25 @@ +[supervisord] +nodaemon = true +environment = GRAPHITE_STORAGE_DIR='/var/lib/graphite/storage',GRAPHITE_CONF_DIR='/var/lib/graphite/conf' + +[program:nginx] +command = /usr/sbin/nginx +stdout_logfile = /var/log/supervisor/%(program_name)s.log +stderr_logfile = /var/log/supervisor/%(program_name)s.log +autorestart = true + +[program:carbon-cache] +user = www-data +command = /var/lib/graphite/bin/carbon-cache.py --debug start +stdout_logfile = /var/log/supervisor/%(program_name)s.log +stderr_logfile = /var/log/supervisor/%(program_name)s.log +autorestart = true + +[program:graphite-webapp] +user = www-data +directory = /var/lib/graphite/webapp +environment = PYTHONPATH='/var/lib/graphite/webapp' +command = /usr/bin/gunicorn_django -b127.0.0.1:8000 -w2 graphite/settings.py +stdout_logfile = /var/log/supervisor/%(program_name)s.log +stderr_logfile = /var/log/supervisor/%(program_name)s.log +autorestart = true