-
Notifications
You must be signed in to change notification settings - Fork 0
/
vulnerability_counter.py
executable file
·127 lines (95 loc) · 4.32 KB
/
vulnerability_counter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python3
# 1st-party
import csv
import logging
import math
import os
import sys
# 2nd-party
import translation_cache
# The experiment is only valid since the following Unix timestamp.
SINCE_TIMESTAMP = 1395360000
NUMBER_OF_DAYS = 30
NUMBER_OF_SECONDS_IN_A_DAY = 24*60*60
# this function traverses the 'tape', checking each request and mapping it to
# either the unsafe set or the safe set, dumps the total number of users in one
# column and the numbers of users affected; we will also have a first column
# representing the unix timestamp for each row.
def traverse_event_log(simple_log_filename, safe_packages, unsafe_packages):
# day number (int): unsafe user count (int)
day_number_to_unsafe_user_count = {}
missed_packages, total_users, unsafe_users = set(), set(), set()
missed_requests, total_requests = 0, 0
prev_timestamp = None
prev_unsafe_user_count = 0
with open(simple_log_filename, 'rt') as simple_log_file:
simple_log_file = csv.reader(simple_log_file)
for timestamp, ip_address, url, user_agent in simple_log_file:
package_name = translation_cache.infer_package_name(url)
if package_name in safe_packages:
assert package_name not in unsafe_packages
else:
unsafe_users.add(ip_address)
if package_name not in unsafe_packages:
missed_packages.add(package_name)
missed_requests += 1
total_requests += 1
assert missed_requests <= total_requests
unsafe_user_count = len(unsafe_users)
assert prev_unsafe_user_count <= unsafe_user_count
prev_unsafe_user_count = unsafe_user_count
total_users.add(ip_address)
total_user_count = len(total_users)
assert unsafe_user_count <= total_user_count
timestamp = int(timestamp)
if prev_timestamp is None:
prev_timestamp = timestamp
assert prev_timestamp <= timestamp
prev_timestamp = timestamp
day_number = (timestamp-SINCE_TIMESTAMP) // NUMBER_OF_SECONDS_IN_A_DAY
day_number_to_unsafe_user_count[day_number] = unsafe_user_count
missed_percentage = (missed_requests/total_requests)*100
assert missed_percentage >= 0, 'Missed {}%'.format(missed_percentage)
assert missed_percentage < 1, 'Missed {}%'.format(missed_percentage)
logging.info('{}% missed requests'.format(missed_percentage))
logging.info('Missed these projects: {}'.format(sorted(missed_packages)))
unsafe_percentage = (unsafe_user_count/total_user_count)*100
assert unsafe_percentage >= 0
assert unsafe_percentage <= 100
logging.info('{:,}/{:,} ({}%) vulnerable users'.format(unsafe_user_count,
total_user_count,
unsafe_percentage))
# We counted number of vulnerable users per day with a dictionary.
assert len(day_number_to_unsafe_user_count) == NUMBER_OF_DAYS
# Now we return the number of vulnerable users per day with a list.
points = [day_number_to_unsafe_user_count[j] for j in range(NUMBER_OF_DAYS)]
return points
if __name__ == '__main__':
# rw for owner and group but not others
os.umask(0o07)
assert len(sys.argv) == 4
# Data source 1: This is where we see users querying project simple indices
# and/or the packages themselves.
simple_log_filename = sys.argv[1]
assert os.path.isfile(simple_log_filename)
# What is the input JSON filename?
input_json_filename = sys.argv[2]
assert os.path.isfile(input_json_filename)
# What is the output JSON filename?
outout_json_filename = sys.argv[3]
assert os.path.isfile(output_json_filename)
with open(input_json_filename, 'rt') as input_json_file:
input_json = json.load(input_json_file)
safe_packages_list = input_json['safe']
unsafe_packages_list = input_json['unsafe']
safe_packages_set = set(safe_packages_list)
unsafe_packages_set = set(unsafe_packages_list)
assert len(safe_packages_set) == len(safe_packages_list)
assert len(unsafe_packages_set) == len(unsafe_packages_list)
assert len(safe_packages & unsafe_packages) == 0
points = traverse_event_log(simple_log_filename, safe_packages,
unsafe_packages)
output_json = {'points': points}
with open(output_json_filename, 'wt') as output_json_file:
json.dump(output_json, output_json_file, sort_keys=True, indent=4,
separators=(',', ': '))