-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTestLogFileAnalyzer.py
185 lines (145 loc) · 7.12 KB
/
TestLogFileAnalyzer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
"""
Test: LogFileAnalyzer
Description:
Test the LogFileAnalyzer class.
"""
import os, shutil
import unittest
import pandas
from datetime import datetime
from log_reader.LogFileAnalyzer import LogFileAnalyzer
from log_reader.utilities import StrPattern
class TestLogFileAnalyzer(unittest.TestCase):
def setUp(self):
"""
Set-up for all of the test, such as copy a test file to a tmp directory
"""
source_file = os.path.join(os.path.abspath(os.path.basename(__file__) + "/../data/"), "log.json")
self._test_log = os.path.join("/", "tmp", "test.json")
shutil.copyfile(source_file, self._test_log)
path = os.path.join('/', 'tmp', 'somefile.txt')
with open(path, 'a'):
os.utime(path, None)
def tearDown(self):
"""
All of the clean-up needed once tests have finished.
"""
os.remove(self._test_log)
def test_create_instance(self):
test = LogFileAnalyzer()
self.assertEqual(test.data_frame, None, msg="Initialized Log Analyzyer does not have a null data frame.")
self.assertEqual(test.log_path, None, msg="Initialized Log Analyzyer does not have a null log file.")
self.assertEqual(set(test.valid_keys.keys()), set(["ts", "pt", "si", "uu", "bg", "sha", "nm", "ph", "dp"]),
msg="Initialized Log Analyzyer is not the default valid key dictionary .")
test = LogFileAnalyzer(log_path=self._test_log)
self.assertEqual(test.data_frame, None, msg="Initialized Log Analyzyer does not have a null data frame.")
self.assertEqual(test.log_path, self._test_log, msg="Initialized Log Analyzyer is not the test log.")
test.valid_keys=None
self.assertEqual(test.valid_keys, None,
msg="Initialized Log Analyzyer does not have a null valid key dictionary .")
def test_set_properties(self):
"""
Test setting the properties of the class.
"""
test = LogFileAnalyzer()
# Test valid values
test_path = os.path.join('/', 'tmp', 'somefile.txt')
test.log_path=test_path
self.assertEqual(test.log_path, test_path, msg="log path was not set properly.")
test_keys = {"a": int, "b": str, "c": str, "d": list}
test.valid_keys=test_keys
self.assertEqual(test.valid_keys, test_keys, msg="valid keys was not set properly.")
# Test invalid log path values
test = LogFileAnalyzer()
with self.assertRaises(AssertionError):
test.log_path = 9
with self.assertRaises(AssertionError):
test.log_path = os.path.join('/', 'bad_dir', 'bad_file.txt')
with self.assertRaises(AssertionError):
test.log_path = pandas.DataFrame()
# Test invalid valid key values
with self.assertRaises(AssertionError):
test.valid_keys = 234.234
with self.assertRaises(AssertionError):
test.valid_keys = [1,2,3,4,5]
# Try and set data frame which is not allowed
df = pandas.DataFrame()
with self.assertRaises(AttributeError):
test.data_frame=df
def test_validate_row(self):
"""
This test tests the protected method that does a lot of the grunt
work verifing the values in a row of data
"""
test = LogFileAnalyzer(log_path=self._test_log)
test.valid_keys = { "a": {"type": str, "max_len": 10, "min_len": 2},
"b": {"type": int, "min": -10, "max": 10 },
"c": {"type": StrPattern, "pattern" : "([\w.]+)@([\w.]+)"},
"d": {"type": datetime, "min" : 1000 },
"e": {"type": str, "values": ["a", "b", "c"]},
}
# Valid row tests
row = {"a": "some text", "b" : 0, "c": "[email protected]", "d" : "2019-05-22 10:30:23", "e" : "c"}
ok, err = test._validate_row(row)
self.assertTrue(ok, msg="First validate row test failed.")
row["a"]="12"; row["b"] = -10
ok, err = test._validate_row(row)
self.assertTrue(ok, msg="Second validate row test failed.")
row["a"]="0123456789"; row["b"]=10; row["e"]="a"
ok, err = test._validate_row(row)
self.assertTrue(ok, msg="Third validate row test failed.")
#Invalid row tests
row["a"]=""
ok, err = test._validate_row(row)
self.assertFalse(ok, msg="First invalid row test failed.")
row["a"]="01234567890"
ok, err = test._validate_row(row)
self.assertFalse(ok, msg="Second invalid row test failed.")
row["a"]="a string"; row["b"]=-11
ok, err = test._validate_row(row)
self.assertFalse(ok, msg="Third invalid row test failed.")
row["b"]=11
ok, err = test._validate_row(row)
self.assertFalse(ok, msg="Fourth invalid row test failed.")
row["b"]=0; row["c"]="a non email"
ok, err = test._validate_row(row)
self.assertFalse(ok, msg="Fifth invalid row test failed.")
row["c"]="[email protected]"; row["d"]=0
ok, err = test._validate_row(row)
self.assertFalse(ok, msg="Sixth invalid row test failed.")
row["d"]="Apr 1, 2019"
ok, err = test._validate_row(row)
self.assertFalse(ok, msg="Seventh invalid row test failed.")
row["d"]="2019-03-14 17:23:22"; row["e"]=3
ok, err = test._validate_row(row)
self.assertFalse(ok, msg="Eigth invalid row test failed.")
def test_reading_a_file(self):
"""
Do test on reading the data from a file and validating it.
"""
# Read a file with 9985 valid records in it.
# The invalid ones that were reported have at least one field
# that does not match the expected values we've used in the
# example.
test = LogFileAnalyzer(log_path=self._test_log)
num_valid_records = test.read_and_validate()
self.assertEqual(num_valid_records, 9985, msg='The wrong number of valid records were found in the test file.')
# TODO test with a number of bad files
def test_show_ext_coung(self):
"""
Do tests on the function that shows the file extensions.
"""
test = LogFileAnalyzer(log_path=self._test_log)
# This is an error, the file must be read and validated first.
with self.assertRaises(AssertionError):
test.show_file_type_counts()
# The results are Based on findings used with grep manually
# TODO would be to keep track of the invalid entries so that
# grep usage coule be automated.
test.read_and_validate()
df = test.show_file_type_counts()
self.assertEqual(208, df['class'], msg="Number of .class files found differs.")
self.assertEqual(174, df['mid'], msg="Number of .mid files found differs.")
self.assertEqual(155, df['xls'], msg="Number of .xls files found differs.")
self.assertEqual(9985, df.sum(), msg="Sum of all the counts is not equal to number of valid records.")
# TODO test with a number of different bad files