-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
233 lines (204 loc) · 9.42 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
from bisect import bisect_left
from random import random, randint
import cPickle as pickle
import datetime
class UserTraces(object):
"""
Format:
------
Each trace file contains a user session in the form:
TIME IP PORT
where each line represents a new stream, TIME is the stream creation timestamp in normalized seconds since the first stream in the session, IP is the destination target of the stream as reported by Tor, and PORT is the destination port of the stream as reported by Tor.
"""
@staticmethod
def from_pickle(filename):
with open(filename, 'rb') as f: return pickle.load(f)
def __init__(self, facebookf, gmailgchatf, gcalgdocsf, websearchf, ircf, bittorrentf):
self.trace = {}
for (key, filename) in [("facebook",facebookf) , ("gmailgchat",gmailgchatf), ("gcalgdocs",gcalgdocsf), ("websearch",websearchf), ("irc",ircf), ("bittorrent",bittorrentf)]:
self.trace[key] = []
with open(filename, 'rb') as f:
for line in f:
parts = line.strip().split()
seconds, ip, port = float(parts[0]), parts[1], int(parts[2])
self.trace[key].append((seconds, ip, port))
def save_pickle(self, filename):
with open(filename, 'wb') as f: pickle.dump(self, f)
class UserModel(object):
"""
Sessions:
--------
We collected session traces of approximately 20 minutes for each usage class. We convert these into usage over time by repeating each trace as indicated by the following weekly usage schedule.
-facebook }
-gmail/gchat } 6:30-7am (1 session) M-F, 6-7pm (3 sessions) M-F
-gcal/gdocs }
-web search }
-irc 8am-5pm (27 sessions) M-F
-bittorrent 12am-6am (18 sessions) Su & Sa
} gmail/gchat at 9am (1 session) Su-Sa
-typical } gcal/gdocs at 12 pm (1 session) Su-Sa
} facebook at 3 pm (1 session) Su-Sa
} web search at 6 pm (2 sessions) Su-Sa
"""
def __init__(self, usertraces, starttime, endtime):
self.model = {}
self.schedule = {}
day = 86400
# first set up the weekly schedule for each session type
for key in ["facebook", "gmailgchat", "gcalgdocs", "websearch"]:
self.schedule[key] = []
trace = usertraces.trace[key]
monmorn, monnight = 109800, 151200
for (morning, night) in [(monmorn, monnight), (monmorn+day, monnight+day), (monmorn+day*2, monnight+day*2), (monmorn+day*3, monnight+day*3), (monmorn+day*4, monnight+day*4)]:
sessionend = self.schedule_session(key, trace, morning) # 0630
sessionend = self.schedule_session(key, trace, night) # 1800
sessionend = self.schedule_session(key, trace, sessionend) # after above session
sessionend = self.schedule_session(key, trace, sessionend) # after above session
for key in ["irc"]:
self.schedule[key] = []
trace = usertraces.trace[key]
monmorn = 115200
for morning in [monmorn, monmorn+day, monmorn+day*2, monmorn+day*3, monmorn+day*4]:
sessionend = self.schedule_session(key, trace, morning)
for i in xrange(26):
sessionend = self.schedule_session(key, trace, sessionend)
for key in ["bittorrent"]:
self.schedule[key] = []
trace = usertraces.trace[key]
sunmorn = 0
for morning in [sunmorn, sunmorn+day*6]:
sessionend = self.schedule_session(key, trace, morning)
for i in xrange(17):
sessionend = self.schedule_session(key, trace, sessionend)
# construct new model of typical usage
self.schedule["typical"] = []
t1, t2, t3, t4 = 32400, 43200, 54000, 64800 # sunday, 9,12,3,6
for numdays in [0,1,2,3,4,5,6]:
self.schedule_session("typical", usertraces.trace["gmailgchat"], t1+day*numdays)
self.schedule_session("typical", usertraces.trace["gcalgdocs"], t2+day*numdays)
self.schedule_session("typical", usertraces.trace["facebook"], t3+day*numdays)
# now 2 consecutive web sessions
sessionend = self.schedule_session("typical", usertraces.trace["websearch"], t4+day*numdays)
self.schedule_session("typical", usertraces.trace["websearch"], sessionend)
self.schedule["typical"].sort(key = lambda x: x[0])
# best/worst case models
self.schedule["best"] = [] # smart sarah
self.schedule["worst"] = [] # dumb dan
for (seconds, ip, port) in self.schedule["typical"]:
self.schedule["best"].append((seconds, ip, 443))
self.schedule["worst"].append((seconds, ip, 6523)) # 6523 is for gobby: a free collaborative text editor
# then build the model during the requested interval
startd = datetime.datetime.fromtimestamp(starttime)
offset = startd.weekday()*3600*24 + startd.hour*3600 + startd.minute*60 + startd.second
endd = datetime.datetime.fromtimestamp(endtime)
for key in self.schedule:
self.model[key] = []
currenttime = 0
week = 0
while currenttime < endtime:
for (seconds, ip, port) in self.schedule[key]:
seconds = seconds + week*604800
if currenttime < offset and seconds < offset: continue
currenttime = seconds-offset+starttime
if currenttime >= endtime: break
if (port != 0):
self.model[key].append({'time':currenttime,\
'type':'connect','ip':ip,'port':port})
else:
self.model[key].append({'time':currenttime,\
'type':'resolve','ip':ip,'port':port})
week += 1
def schedule_session(self, key, trace, sessionstart):
s = 0
for (seconds, ip, port) in trace:
s = sessionstart+seconds
self.schedule[key].append((s, ip, port))
if s < sessionstart+20*60: s = sessionstart+20*60 # assume session took at least 20 minutes
return s
def get_streams(self, session):
return self.model[session]
class Relay():
def __init__(self, name, isexit, isguard, weight):
self.name = name
self.isexit = isexit
self.isguard = isguard
self.weight = weight
self.congestion = []
class CongestionProfile(object):
"""
"""
def __init__(self, relay):
self.name = relay.name
self.isexit = relay.isexit
self.isguard = relay.isguard
self.weight = relay.weight
# create 100 bins spanning congestion range
self.lenc = len(relay.congestion)
self.minc, self.maxc = 1000*min(relay.congestion), 1000*max(relay.congestion)
self.binsize = int((self.maxc-self.minc)/100.0)
self.breakpoints = range(self.minc, self.maxc, self.binsize)
self.bins = [0]*len(self.breakpoints)
self.cumul, self.total = [], 0.0
# use congestion values to count weight of each bin
for c in relay.congestion:
i = bisect_left(self.breakpoints, c*1000.0)
if i >= len(self.bins): i = len(self.bins)-1
self.bins[i] += 1
# make CDF of the bin weights
for w in self.bins:
self.total += w
self.cumul.append(self.total)
def get_congestion(self):
'''returns milliseconds of congestion'''
# probabilistically choose a bin by sampling the bin weights CDF
x = random() * self.total
i = bisect_left(self.cumul, x)
# draw a uniform value from its range
low = self.breakpoints[i]
high = low + self.binsize
return randint(low, high) / 1000.0
class CongestionModel(object):
"""
"""
def __init__(self, tracefilename):
if tracefilename is None: return None
self.assigned = {}
self.profiles = {}
relays = None
try:
with open(tracefilename, 'rb') as inf:
relays = pickle.load(inf)
for name in relays: self.profiles[name] = CongestionProfile(relays[name])
except Exception:
return None
'''
Gets the relay profile with a consensus weight closest to the given
weight, taking into consideration the exit and guard flags.
Returns the match or None. (We currently have no guard-only profiles.)
'''
def find_match(self, weight, isexit=False, isguard=False):
match, dist = None, None
for name in self.profiles:
r = self.profiles[name]
if isexit != r.isexit or isguard != r.isguard: continue
d = abs(weight-r.weight)
if match is None or d < dist:
match = r
dist = d
return match
def get_congestion(self, name, weight, isexit=False, isguard=False):
if isguard and not isexit: isexit = True
if name not in self.assigned: self.assigned[name] = self.find_match(weight, isexit, isguard)
return self.assigned[name].get_congestion()
class PropagationDelayModel(object):
"""
"""
def __init__(self, tracefilename):
if tracefilename is None: return None
try:
pass
except Exception:
return None
def get_prop_delay(self, ip1, ip2):
# return prop delay between ip1 and ip2, based on the model
return 100