added little py script to do some nginx logs analysis

2024-10-13 13:34:05 +00:00 · 2017-05-24 15:02:06 +02:00
parent 488074fa09
commit 8f6e7fc735
1 changed files with 74 additions and 0 deletions
--- a/k8s/logging-and-monitoring/analyze.py
+++ b/k8s/logging-and-monitoring/analyze.py
@@ -0,0 +1,74 @@
+"""
+A little Python script to do some analysis of the NGINX logs.
+To get the relevant NGINX logs:
+1. Go to the OMS Portal
+2. Create a new Log Search
+3. Use a search string such as:
+
+Type=ContainerLog Image="bigchaindb/nginx_3scale:1.3" GET NOT("Go-http-client") NOT(runscope)
+
+(This gets all logs from the NGINX container, only those with the word "GET", excluding those with the string "Go-http-client" [internal Kubernetes traffic], excluding those with the string "runscope" [Runscope tests].)
+
+4. In the left sidebar, at the top, use the dropdown menu to select the time range, e.g. "Data based on last 7 days". Pay attention to the number of results and the time series chart in the left sidebar. Are there any spikes?
+5. Export the search results. A CSV file will be saved on your local machine.
+6. $ python3 analyze.py logs.csv
+
+Thanks to https://gist.github.com/hreeder/f1ffe1408d296ce0591d
+"""
+
+import sys
+import csv
+import re
+import datetime
+from dateutil.parser import parse
+
+
+lineformat = re.compile(r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])""", re.IGNORECASE)
+
+filepath = sys.argv[1]
+
+logline_list = []
+with open(filepath) as csvfile:
+    csvreader = csv.reader(csvfile, delimiter=',')
+    for row in csvreader:
+        if row and (row[8] != 'LogEntry'):
+            # because the first line is just the column headers, such as 'LogEntry'
+            logline = row[8]
+            print(logline + '\n')
+            logline_data = re.search(lineformat, logline)
+            if logline_data:
+                logline_dict = logline_data.groupdict()
+                logline_list.append(logline_dict)
+                # so logline_list is a list of dicts
+                # print('{}'.format(logline_dict))
+
+# Example logline:
+
+# 95.91.211.240 - - [22/May/2017:13:23:21 +0000] "GET /api/v1/statuses?tx_id=2306f34f6a98f1754e1048e8a71cc6b2d01ff594b08f6def88e15931caaaca98 HTTP/1.1" 200 120 "-" "python-requests/2.13.0" 
+
+# Example logline_dict:
+
+# {'statuscode': '200', 'url': '/api/v1/statuses?tx_id=2306f34f6a98f1754e1048e8a71cc6b2d01ff594b08f6def88e15931caaaca98 ', 'dateandtime': '22/May/2017:13:23:21 +0000', 'useragent': 'python-requests/2.13.0', 'refferer': '-', 'bytessent': '120', 'ipaddress': '95.91.211.240'}
+
+# Analysis
+
+total_bytes_sent = 0
+tstamp_list = []
+
+for lldict in logline_list:
+    total_bytes_sent += int(lldict['bytessent'])
+    dt = lldict['dateandtime']
+    # https://tinyurl.com/lqjnhot
+    dtime = parse(dt[:11] + " " + dt[12:])
+    tstamp_list.append(dtime.timestamp())
+
+print('Number of log lines seen: {}'.format(len(logline_list)))
+
+# Time range
+trange_sec = max(tstamp_list) - min(tstamp_list)
+trange_days = trange_sec / 60.0 / 60.0 / 24.0
+print("Time range seen (days): {}".format(trange_days))
+
+print("Total bytes sent: {}".format(total_bytes_sent))
+
+print("Average bytes sent per day (out via GET): {}".format(total_bytes_sent / trange_days))