# Copyright © 2020 Interplanetary Database Association e.V., # Planetmint and IPDB software contributors. # SPDX-License-Identifier: (Apache-2.0 AND CC-BY-4.0) # Code is Apache-2.0 and docs are CC-BY-4.0 """ A little Python script to do some analysis of the NGINX logs. To get the relevant NGINX logs: 1. Go to the OMS Portal 2. Create a new Log Search 3. Use a search string such as: Type=ContainerLog Image="planetmint/nginx_3scale:1.3" GET NOT("Go-http-client") NOT(runscope) (This gets all logs from the NGINX container, only those with the word "GET", excluding those with the string "Go-http-client" [internal Kubernetes traffic], excluding those with the string "runscope" [Runscope tests].) 4. In the left sidebar, at the top, use the dropdown menu to select the time range, e.g. "Data based on last 7 days". Pay attention to the number of results and the time series chart in the left sidebar. Are there any spikes? 5. Export the search results. A CSV file will be saved on your local machine. 6. $ python3 analyze.py logs.csv Thanks to https://gist.github.com/hreeder/f1ffe1408d296ce0591d """ import sys import csv import re from dateutil.parser import parse lineformat = re.compile( r"(?P\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - " r"\[(?P\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} " r'(\+|\-)\d{4})\] ((\"(GET|POST) )(?P.+)(http\/1\.1")) ' r"(?P\d{3}) " r"(?P\d+) " r'(["](?P(\-)|(.+))["]) ' r'(["](?P.+)["])', re.IGNORECASE, ) filepath = sys.argv[1] logline_list = [] with open(filepath) as csvfile: csvreader = csv.reader(csvfile, delimiter=",") for row in csvreader: if row and (row[8] != "LogEntry"): # because the first line is just the column headers, such as 'LogEntry' logline = row[8] print(logline + "\n") logline_data = re.search(lineformat, logline) if logline_data: logline_dict = logline_data.groupdict() logline_list.append(logline_dict) # so logline_list is a list of dicts # print('{}'.format(logline_dict)) # Analysis total_bytes_sent = 0 tstamp_list = [] for lldict in logline_list: total_bytes_sent += int(lldict["bytessent"]) dt = lldict["dateandtime"] # https://tinyurl.com/lqjnhot dtime = parse(dt[:11] + " " + dt[12:]) tstamp_list.append(dtime.timestamp()) print("Number of log lines seen: {}".format(len(logline_list))) # Time range trange_sec = max(tstamp_list) - min(tstamp_list) trange_days = trange_sec / 60.0 / 60.0 / 24.0 print("Time range seen (days): {}".format(trange_days)) print("Total bytes sent: {}".format(total_bytes_sent)) print("Average bytes sent per day (out via GET): {}".format(total_bytes_sent / trange_days))