-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlog_stats.py
190 lines (162 loc) · 7.53 KB
/
log_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""
Analyze server stats for specific things related to onion services
version 0.2
"""
import os
import datetime
import click
import sh
import logging
from dotenv import load_dotenv
import sqlalchemy as db
from system_utilities import get_configs
from simple_AWS.s3_functions import *
from log_reporting_utilities import analyze_file, analyze_data, output, filter_and_get_date
from db_utilities import report_save
from azure_utilities import retrieve_logs
from db_utilities import get_sys_info
logger = logging.getLogger('logger')
@click.command()
@click.option('--percent', type=int, help="Floor percentage to display for agents and codes (default is 1%)", default=1)
@click.option('--num', type=int, help="Top number of pages to display (default is 30", default=30)
@click.option('--unzip', is_flag=True, help="Process zipped log files", default=False)
@click.option('--daemon', is_flag=True, default=False, help="Run in daemon mode. All output goes to a file.")
@click.option('--range', type=int, help="Days of log file age to analyze. Default is 7", default=7)
@click.option('--domain', type=str, help="Domain to analyze. Default is 'all'", default='all')
def analyze(unzip, percent, num, daemon, range, domain):
import faulthandler; faulthandler.enable()
# update system info
last_logfile_analysis = get_sys_info(request='last_logfile_analysis', update=True)
configs = get_configs()
now = datetime.datetime.now()
now_string = now.strftime('%d-%b-%Y:%H:%M:%S')
load_dotenv()
engine = db.create_engine(os.environ['DATABASE_URL'])
connection = engine.connect()
metadata = db.MetaData()
domains = db.Table('domains', metadata, autoload=True, autoload_with=engine)
domains_list = []
query = db.select([domains])
result = connection.execute(query).fetchall()
for line in result:
domains_list.append({'id' : line[0], 'name' : line[1], 's3_bucket' : line[4], 'azure_profile' : line[5]})
for dm in domains_list:
if ((domain == 'all') or (dm['name'] == domain)):
# First, is there an azure profile set?
if ('azure_profile' in dm) and (dm['azure_profile']):
logger.debug(f"Domain: {dm['name']}: Azure Profile: {dm['azure_profile']}")
retrieve_logs(profile_name=dm['azure_profile'], range=range, s3_bucket=dm['s3_bucket'])
try:
s3simple = S3Simple(region_name=configs['region'],
profile=configs['profile'],
bucket_name=dm['s3_bucket'])
except:
logger.warning(f"No bucket set for domain {dm['name']}")
continue
# get the file list to analyze
# read from S3
#logger.debug(f"Getting files from S3 bucket {dm['s3_bucket']}...")
file_list = s3simple.s3_bucket_contents()
if not file_list:
continue
logger.debug(f"File List: {file_list}")
compiled_data = {
'nginx': [],
'cloudfront': [],
'fastly': [],
'azure': []
}
logger.debug(f"Analyzing {dm['name']}...")
for ifile in file_list:
if 'LogAnalysis' in ifile:
continue
if (('.gz' in ifile or '.bz2' in ifile) and not unzip):
continue
logger.debug(f"Processing file: {ifile}")
if ifile[-1] == '/':
directory = configs['local_tmp'] + '/' + ifile
if not os.path.isdir(directory):
os.mkdir(directory)
continue
file_date = filter_and_get_date(ifile)
if not file_date:
logger.warning("Couldn't find date in logs!")
continue
numdays = (now - file_date).days
if numdays > range:
continue
#download
local_path = configs['local_tmp'] + '/' + ifile
#logger.debug(f"Downloading ... domain: {dm['name']} to {local_path}")
try:
s3simple.download_file(file_name=ifile, output_file=local_path)
except:
continue
# Add to aggregate
file_parts = ifile.split('.')
ext = file_parts[-1]
if ext == 'bz2' or ext == 'gz':
if unzip:
if ext == 'bz2':
raw_data = str(sh.bunzip2("-k", "-c", local_path))
else:
raw_data = str(sh.gunzip("-k", "-c", local_path))
else:
continue
else:
with open(local_path) as f:
raw_data = f.read()
#logger.debug(f"Files data: {raw_data}")
compiled_log_data, log_type = analyze_file(raw_data, dm['name'])
if not compiled_log_data:
logger.warning("No Data!")
continue
compiled_data[log_type] += compiled_log_data
#logger.debug(f"Deleting local temporary file {local_path}...")
os.remove(local_path)
for log_type in compiled_data:
logger.debug(f"Log type: {log_type}")
#logger.debug(f"Analyzed data: {compiled_data[log_type]}")
if not compiled_data[log_type]:
continue
analyzed_log_data = analyze_data(compiled_data[log_type], log_type)
(output_text, first_date, last_date, hits, home_page_hits) = output(
domain=dm['name'],
data=analyzed_log_data,
percent=percent,
num=num)
logger.debug(output_text)
logger.debug("Saving log analysis file...")
key = 'LogAnalysis_' + dm['name'] + '_' + log_type + '_' + now_string + '.json'
body = str(analyzed_log_data)
s3simple.put_to_s3(key=key, body=body)
logger.debug("Saving output file....")
key = 'LogAnalysisOutput_' + dm['name'] + '_' + log_type + '_' + now_string + '.txt'
s3simple.put_to_s3(key=key, body=output_text)
logger.debug("Sending Report to Database...")
report_save(
domain=dm['name'],
datetime=now,
report_text=output_text,
hits=hits,
home_page_hits=home_page_hits,
first_date_of_log=first_date,
last_date_of_log=last_date,
log_type=log_type
)
return
if __name__ == '__main__':
configs = get_configs()
log = configs['log_level']
logger = logging.getLogger('logger') # instantiate clogger
logger.setLevel(logging.DEBUG) # pass DEBUG and higher values to handler
ch = logging.StreamHandler() # use StreamHandler, which prints to stdout
ch.setLevel(configs['log_level']) # ch handler uses the configura
# create formatter
# display the function name and logging level in columnar format if
# logging mode is 'DEBUG'
formatter = logging.Formatter('[%(funcName)24s] [%(levelname)8s] %(message)s')
# add formatter to ch
ch.setFormatter(formatter)
logger.addHandler(ch)
analyze()