mirror of
https://github.com/bigchaindb/bigchaindb.git
synced 2024-10-13 13:34:05 +00:00
Merge pull request #1498 from bigchaindb/py-script-to-analyze-nginx-logs
More docs re/ OMS logs + a script to analyze NGINX logs
This commit is contained in:
commit
0c1b30b59d
@ -193,30 +193,55 @@ simply run the following command:
|
||||
$ kubectl create -f oms-daemonset.yaml
|
||||
|
||||
|
||||
Create an Email Alert
|
||||
---------------------
|
||||
Search the OMS Logs
|
||||
-------------------
|
||||
|
||||
Suppose you want to get an email whenever there's a logging message
|
||||
with the CRITICAL or ERROR logging level from any container.
|
||||
At the time of writing, it wasn't possible to create email alerts
|
||||
using the Azure Portal (as far as we could tell),
|
||||
but it *was* possible using the OMS Portal.
|
||||
(There are instructions to get to the OMS Portal
|
||||
in the section titled :ref:`Deploy the OMS Agents` above.)
|
||||
OMS should now be getting, storing and indexing all the logs
|
||||
from all the containers in your Kubernetes cluster.
|
||||
You can search the OMS logs from the Azure Portal
|
||||
or the OMS Portal, but at the time of writing,
|
||||
there was more functionality in the OMS Portal
|
||||
(e.g. the ability to create an Alert based on a search).
|
||||
|
||||
There are instructions to get to the OMS Portal
|
||||
in the section titled :ref:`Deploy the OMS Agents` above.
|
||||
Once you're in the OMS Portal, click on **Log Search**
|
||||
and enter the query string:
|
||||
and enter a query.
|
||||
Here are some example queries:
|
||||
|
||||
All logging messages containing the strings "critical" or "error" (not case-sensitive):
|
||||
|
||||
``Type=ContainerLog (critical OR error)``
|
||||
|
||||
If you don't see any query results,
|
||||
try experimenting with the query string and time range
|
||||
to convince yourself that it's working.
|
||||
For query syntax help, see the
|
||||
`Log Analytics search reference <https://docs.microsoft.com/en-us/azure/log-analytics/log-analytics-search-reference>`_.
|
||||
If you want to exclude the "404 Not Found" errors,
|
||||
use the query string
|
||||
"Type=ContainerLog (critical OR error) NOT(404)".
|
||||
Once you're satisfied with the query string,
|
||||
.. note::
|
||||
|
||||
You can filter the results even more by clicking on things in the left sidebar.
|
||||
For OMS Log Search syntax help, see the
|
||||
`Log Analytics search reference <https://docs.microsoft.com/en-us/azure/log-analytics/log-analytics-search-reference>`_.
|
||||
|
||||
All logging messages containing the string "error" but not "404":
|
||||
|
||||
``Type=ContainerLog error NOT(404)``
|
||||
|
||||
All logging messages containing the string "critical" but not "CriticalAddonsOnly":
|
||||
|
||||
``Type=ContainerLog critical NOT(CriticalAddonsOnly)``
|
||||
|
||||
All logging messages from containers running the Docker image bigchaindb/nginx_3scale:1.3, containing the string "GET" but not the strings "Go-http-client" or "runscope" (where those exclusions filter out tests by Kubernetes and Runscope):
|
||||
|
||||
``Type=ContainerLog Image="bigchaindb/nginx_3scale:1.3" GET NOT("Go-http-client") NOT(runscope)``
|
||||
|
||||
.. note::
|
||||
|
||||
We wrote a small Python 3 script to analyze the logs found by the above NGINX search.
|
||||
It's in ``k8s/logging-and-monitoring/analyze.py``. The docsting at the top
|
||||
of the script explains how to use it.
|
||||
|
||||
|
||||
Create an Email Alert
|
||||
---------------------
|
||||
|
||||
Once you're satisfied with an OMS Log Search query string,
|
||||
click the **🔔 Alert** icon in the top menu,
|
||||
fill in the form,
|
||||
and click **Save** when you're done.
|
||||
|
77
k8s/logging-and-monitoring/analyze.py
Normal file
77
k8s/logging-and-monitoring/analyze.py
Normal file
@ -0,0 +1,77 @@
|
||||
"""
|
||||
A little Python script to do some analysis of the NGINX logs.
|
||||
To get the relevant NGINX logs:
|
||||
1. Go to the OMS Portal
|
||||
2. Create a new Log Search
|
||||
3. Use a search string such as:
|
||||
|
||||
Type=ContainerLog Image="bigchaindb/nginx_3scale:1.3" GET NOT("Go-http-client") NOT(runscope)
|
||||
|
||||
(This gets all logs from the NGINX container, only those with the word "GET",
|
||||
excluding those with the string "Go-http-client" [internal Kubernetes traffic],
|
||||
excluding those with the string "runscope" [Runscope tests].)
|
||||
|
||||
4. In the left sidebar, at the top, use the dropdown menu to select the time range,
|
||||
e.g. "Data based on last 7 days". Pay attention to the number of results and
|
||||
the time series chart in the left sidebar. Are there any spikes?
|
||||
5. Export the search results. A CSV file will be saved on your local machine.
|
||||
6. $ python3 analyze.py logs.csv
|
||||
|
||||
Thanks to https://gist.github.com/hreeder/f1ffe1408d296ce0591d
|
||||
"""
|
||||
|
||||
import sys
|
||||
import csv
|
||||
import re
|
||||
from dateutil.parser import parse
|
||||
|
||||
|
||||
lineformat = re.compile(r'(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - '
|
||||
r'\[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} '
|
||||
r'(\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) '
|
||||
r'(?P<statuscode>\d{3}) '
|
||||
r'(?P<bytessent>\d+) '
|
||||
r'(["](?P<refferer>(\-)|(.+))["]) '
|
||||
r'(["](?P<useragent>.+)["])',
|
||||
re.IGNORECASE)
|
||||
|
||||
filepath = sys.argv[1]
|
||||
|
||||
logline_list = []
|
||||
with open(filepath) as csvfile:
|
||||
csvreader = csv.reader(csvfile, delimiter=',')
|
||||
for row in csvreader:
|
||||
if row and (row[8] != 'LogEntry'):
|
||||
# because the first line is just the column headers, such as 'LogEntry'
|
||||
logline = row[8]
|
||||
print(logline + '\n')
|
||||
logline_data = re.search(lineformat, logline)
|
||||
if logline_data:
|
||||
logline_dict = logline_data.groupdict()
|
||||
logline_list.append(logline_dict)
|
||||
# so logline_list is a list of dicts
|
||||
# print('{}'.format(logline_dict))
|
||||
|
||||
# Analysis
|
||||
|
||||
total_bytes_sent = 0
|
||||
tstamp_list = []
|
||||
|
||||
for lldict in logline_list:
|
||||
total_bytes_sent += int(lldict['bytessent'])
|
||||
dt = lldict['dateandtime']
|
||||
# https://tinyurl.com/lqjnhot
|
||||
dtime = parse(dt[:11] + " " + dt[12:])
|
||||
tstamp_list.append(dtime.timestamp())
|
||||
|
||||
print('Number of log lines seen: {}'.format(len(logline_list)))
|
||||
|
||||
# Time range
|
||||
trange_sec = max(tstamp_list) - min(tstamp_list)
|
||||
trange_days = trange_sec / 60.0 / 60.0 / 24.0
|
||||
print('Time range seen (days): {}'.format(trange_days))
|
||||
|
||||
print('Total bytes sent: {}'.format(total_bytes_sent))
|
||||
|
||||
print('Average bytes sent per day (out via GET): {}'.
|
||||
format(total_bytes_sent / trange_days))
|
Loading…
x
Reference in New Issue
Block a user