diff --git a/docs/server/source/cloud-deployment-templates/log-analytics.rst b/docs/server/source/cloud-deployment-templates/log-analytics.rst index 1f5d5596..5354b4e2 100644 --- a/docs/server/source/cloud-deployment-templates/log-analytics.rst +++ b/docs/server/source/cloud-deployment-templates/log-analytics.rst @@ -193,30 +193,55 @@ simply run the following command: $ kubectl create -f oms-daemonset.yaml -Create an Email Alert ---------------------- +Search the OMS Logs +------------------- -Suppose you want to get an email whenever there's a logging message -with the CRITICAL or ERROR logging level from any container. -At the time of writing, it wasn't possible to create email alerts -using the Azure Portal (as far as we could tell), -but it *was* possible using the OMS Portal. -(There are instructions to get to the OMS Portal -in the section titled :ref:`Deploy the OMS Agents` above.) +OMS should now be getting, storing and indexing all the logs +from all the containers in your Kubernetes cluster. +You can search the OMS logs from the Azure Portal +or the OMS Portal, but at the time of writing, +there was more functionality in the OMS Portal +(e.g. the ability to create an Alert based on a search). + +There are instructions to get to the OMS Portal +in the section titled :ref:`Deploy the OMS Agents` above. Once you're in the OMS Portal, click on **Log Search** -and enter the query string: +and enter a query. +Here are some example queries: + +All logging messages containing the strings "critical" or "error" (not case-sensitive): ``Type=ContainerLog (critical OR error)`` -If you don't see any query results, -try experimenting with the query string and time range -to convince yourself that it's working. -For query syntax help, see the -`Log Analytics search reference `_. -If you want to exclude the "404 Not Found" errors, -use the query string -"Type=ContainerLog (critical OR error) NOT(404)". -Once you're satisfied with the query string, +.. note:: + + You can filter the results even more by clicking on things in the left sidebar. + For OMS Log Search syntax help, see the + `Log Analytics search reference `_. + +All logging messages containing the string "error" but not "404": + +``Type=ContainerLog error NOT(404)`` + +All logging messages containing the string "critical" but not "CriticalAddonsOnly": + +``Type=ContainerLog critical NOT(CriticalAddonsOnly)`` + +All logging messages from containers running the Docker image bigchaindb/nginx_3scale:1.3, containing the string "GET" but not the strings "Go-http-client" or "runscope" (where those exclusions filter out tests by Kubernetes and Runscope): + +``Type=ContainerLog Image="bigchaindb/nginx_3scale:1.3" GET NOT("Go-http-client") NOT(runscope)`` + +.. note:: + + We wrote a small Python 3 script to analyze the logs found by the above NGINX search. + It's in ``k8s/logging-and-monitoring/analyze.py``. The docsting at the top + of the script explains how to use it. + + +Create an Email Alert +--------------------- + +Once you're satisfied with an OMS Log Search query string, click the **🔔 Alert** icon in the top menu, fill in the form, and click **Save** when you're done. diff --git a/k8s/logging-and-monitoring/analyze.py b/k8s/logging-and-monitoring/analyze.py new file mode 100644 index 00000000..eabb3438 --- /dev/null +++ b/k8s/logging-and-monitoring/analyze.py @@ -0,0 +1,77 @@ +""" +A little Python script to do some analysis of the NGINX logs. +To get the relevant NGINX logs: +1. Go to the OMS Portal +2. Create a new Log Search +3. Use a search string such as: + +Type=ContainerLog Image="bigchaindb/nginx_3scale:1.3" GET NOT("Go-http-client") NOT(runscope) + +(This gets all logs from the NGINX container, only those with the word "GET", +excluding those with the string "Go-http-client" [internal Kubernetes traffic], +excluding those with the string "runscope" [Runscope tests].) + +4. In the left sidebar, at the top, use the dropdown menu to select the time range, +e.g. "Data based on last 7 days". Pay attention to the number of results and +the time series chart in the left sidebar. Are there any spikes? +5. Export the search results. A CSV file will be saved on your local machine. +6. $ python3 analyze.py logs.csv + +Thanks to https://gist.github.com/hreeder/f1ffe1408d296ce0591d +""" + +import sys +import csv +import re +from dateutil.parser import parse + + +lineformat = re.compile(r'(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - ' + r'\[(?P\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} ' + r'(\+|\-)\d{4})\] ((\"(GET|POST) )(?P.+)(http\/1\.1")) ' + r'(?P\d{3}) ' + r'(?P\d+) ' + r'(["](?P(\-)|(.+))["]) ' + r'(["](?P.+)["])', + re.IGNORECASE) + +filepath = sys.argv[1] + +logline_list = [] +with open(filepath) as csvfile: + csvreader = csv.reader(csvfile, delimiter=',') + for row in csvreader: + if row and (row[8] != 'LogEntry'): + # because the first line is just the column headers, such as 'LogEntry' + logline = row[8] + print(logline + '\n') + logline_data = re.search(lineformat, logline) + if logline_data: + logline_dict = logline_data.groupdict() + logline_list.append(logline_dict) + # so logline_list is a list of dicts + # print('{}'.format(logline_dict)) + +# Analysis + +total_bytes_sent = 0 +tstamp_list = [] + +for lldict in logline_list: + total_bytes_sent += int(lldict['bytessent']) + dt = lldict['dateandtime'] + # https://tinyurl.com/lqjnhot + dtime = parse(dt[:11] + " " + dt[12:]) + tstamp_list.append(dtime.timestamp()) + +print('Number of log lines seen: {}'.format(len(logline_list))) + +# Time range +trange_sec = max(tstamp_list) - min(tstamp_list) +trange_days = trange_sec / 60.0 / 60.0 / 24.0 +print('Time range seen (days): {}'.format(trange_days)) + +print('Total bytes sent: {}'.format(total_bytes_sent)) + +print('Average bytes sent per day (out via GET): {}'. + format(total_bytes_sent / trange_days))