-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathsun_one_parser.py
More file actions
69 lines (52 loc) · 1.99 KB
/
sun_one_parser.py
File metadata and controls
69 lines (52 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import re
import os
from datetime import datetime
#example of logs https://github.com/buuren/python/edit/master/done/log_example.txt
startTime = datetime.now()
d = {}
m = []
dirname = '/home/user/logs'
for filename in os.listdir(dirname):
full_filename = dirname + '/' + filename
f = open(full_filename, 'r')
# find 00:06:21 in [06/Apr/2014:00:06:21 +0200]
time_compile = re.compile(r'\[.*?:(\d+):.*?\]')
# find www.google.com in "https://www.google.com/"
url_compile = re.compile(r'www.\w+.\w+')
# find 83.215.5.41 in 83.215.5.41 - - [06/Apr/2014:00:07:07 +0200] "GET
ip_compile = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}[^0-9]')
for line in f:
if 'HTTP/1.1" 200' in line:
time_z = re.search(time_compile, line).group(1)
if time_z >= '20' or time_z <= '01':
url_ = re.search(url_compile, line)
if url_:
url = url_.group()
if 'something' in url:
ip = re.search(ip_compile, line).group()
if url in d:
new_ip = True
ix = -1
for each_ip in d[url]:
ix += 1
if ip in each_ip:
d[url][ix][1] += 1
new_ip = False
break
if new_ip is True:
ip_list = [ip, 1]
d[url].append(ip_list)
else:
ip_list = [ip, 1]
d[url] = [ip_list]
print 'File done: %s' % full_filename
print(datetime.now()-startTime)
sorts = d.items()
h1 = []
for k, v in sorts:
h1.append(len(v))
print '%s\t%s' % (k, len(v))
m.append(len(v))
print ""
print 'Total requests: %s' % (sum(h1))
print(datetime.now()-startTime)