-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcountry_inference.py
133 lines (125 loc) · 5.48 KB
/
country_inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import sys
import os
from pathsim import *
import csv
import urllib
import gzip
def ip_in_country(ip, subnets):
"""
Returns True if {ip} is in the range of one of the {subnets},
Returns False otherwise.
"""
ipv4 = [int(n) for n in ip.split('.')]
for subnets_ranges in subnets:
range_start_full, range_end_full = subnets_ranges.split(',')
range_start = [int(n) for n in range_start_full.split('.')]
range_end = [int(n) for n in range_end_full.split('.')]
if ipv4[0] == range_start[0] and ipv4[0] == range_end[0]:
if ipv4[1] == range_start[1] and ipv4[1] == range_end[1]:
if ipv4[2] == range_start[2] and ipv4[2] == range_end[2]:
if range_start[3] <= ipv4[3] <= range_end[3]:
return True
elif range_start[2] <= ipv4[2] <= range_end[2]:
if ipv4[2] == range_start[2]:
if ipv4[3] >= range_start[3]:
return True
elif ipv4[2] == range_end[2]:
if ipv4[3] <= range_end[3]:
return True
else:
return True
elif range_start[1] <= ipv4[1] <= range_end[1]:
if ipv4[1] == range_start[1]:
if ipv4[2] == range_start[2]:
if ipv4[3] >= range_start[3]:
return True
elif ipv4[2] >= range_start[2]:
return True
elif ipv4[1] == range_end[1]:
if ipv4[2] == range_end[2]:
if ipv4[3] <= range_end[3]:
return True
elif ipv4[2] <= range_end[2]:
return True
else:
return True
elif range_start[0] <= ipv4[0] <= range_end[0]:
if ipv4[0] == range_start[0]:
if ipv4[1] == range_start[1]:
if ipv4[2] == range_start[2]:
if ipv4[3] >= range_start[3]:
return True
elif ipv4[2] >= range_start[2]:
return True
elif ipv4[1] >= range_start[1]:
return True
elif ipv4[0] == range_end[0]:
if ipv4[1] == range_end[0]:
if ipv4[2] == range_end[2]:
if ipv4[3] <= range_end[3]:
return True
elif ipv4[2] <= range_end[2]:
return True
elif ipv4[1] <= range_end[1]:
return True
else:
return True
return False
if __name__ == '__main__':
usage = 'Usage: as_inference.py [country code] [logs_in_dir] [results_out_dir] \n\
Extracts the guard/exit IPs contained in [logs_in_dir] belonging to the country [country code], and writes them in\
[results_out_dir/[country code]_guards] (guard IPs) and [results_out_dir/[country code]_exits] (exit IPs)'
if (len(sys.argv) < 4):
print(usage)
sys.exit(1)
searched_country_code = sys.argv[1]
in_dir = sys.argv[2]
out_dir = sys.argv[3]
log_files = []
for dirpath, dirnames, filenames in os.walk(in_dir, followlinks=True):
for filename in filenames:
if (filename[0] != '.'):
log_files.append(os.path.join(dirpath,filename))
log_files.sort(key = lambda x: os.path.basename(x))
# Prepare the country subnets in DictReader
subnets_country_file = urllib.URLopener()
subnets_country_file.retrieve("https://iptoasn.com/data/ip2country-v4.tsv.gz", "ip2country-v4.tsv.gz")
subnets = []
with gzip.open('ip2country-v4.tsv.gz', 'rb') as csvfile:
countryreader = csv.DictReader(csvfile, ['range_start', 'range_end', 'country_code'], dialect='excel-tab')
for row in countryreader:
if row['country_code'] == searched_country_code:
subnets.append(row['range_start']+','+row['range_end'])
# Add guards and exits belonging to the searched country for the guards/exits IP contained in log_files
country_guards = []
country_exits = []
i = 0
for log_file in log_files:
with open(log_file, 'r') as lf:
lf.readline() # read header line
for line in lf:
line = line[0:-1] # cut off final newline
line_fields = line.split('\t')
id = int(line_fields[0])
time = float(line_fields[1])
guard_ip = line_fields[2]
exit_ip = line_fields[3]
if guard_ip not in country_guards:
if ip_in_country(guard_ip, subnets):
country_guards.append(guard_ip)
if exit_ip not in country_exits:
if ip_in_country(exit_ip, subnets):
country_exits.append(exit_ip)
lf.close()
i += 1
guards_file = os.path.join(out_dir,searched_country_code+"_guards")
exits_file = os.path.join(out_dir,searched_country_code+"_exits")
with open(guards_file, 'w') as gf, open(exits_file, 'w') as ef:
# Write all the Country IPs to the specified files
for country_guard in country_guards:
gf.write("%s\n" % country_guard)
for country_exit in country_exits:
ef.write("%s\n" % country_exit)
gf.close()
ef.close()
csvfile.close()