-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
139 lines (128 loc) · 5.03 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import numpy as np
import os
MOVING_ACTIONS = ['action$forward', 'action$left', 'action$back', 'action$right', 'action$jump', 'action$sneak',
'action$sprint', 'action$attack']
OTHER_ACTIONS = ['action$camera', 'action$place', 'action$equip', 'action$craft', 'action$nearbyCraft',
'action$nearbySmelt']
MINERL_DATA_ROOT = os.getenv('MINERL_DATA_ROOT', 'D:\\MineRL data 2020')
CUMULATIVE_REWARDS = [0, 1, 3, 7, 11, 19, 35, 67, 99, 131, 163, 291, 547, 1571, 10000]
def actions_from_file(data_set, trajectory):
"""
Takes a trajectory path and returns all of it's actions as a list of tuples of strings.
An example of an action: ('camera: [0. 0.]', 'nearbyCraft: iron_pickaxe').
:param data_set: data set name (for example: 'MineRLObtainDiamond-v0')
:param trajectory: trajectory path
:return: list of actions
"""
doc = os.path.join(MINERL_DATA_ROOT, data_set, trajectory, 'rendered.npz')
f = np.load(doc)
actions = []
for i in range(len(f['reward'])):
tick_acts = tuple()
for act in MOVING_ACTIONS:
if f[act][i] != 0:
tick_acts += (act[7:],)
for act in OTHER_ACTIONS:
if act == 'action$camera' or f[act][i] != 'none':
tick_acts += (f'{act[7:]}: {f[act][i]}',)
actions.append(tick_acts)
return actions
def deobfuscate_kmeans_actions(kmeans, action_mapping_dict):
"""
For each centroid of given KMeans, finds the nearest obfuscated action in the mapping as measured by MSE.
:param kmeans: KMeans object
:param action_mapping_dict: mapping from non-obfuscated to obfuscated actions
:return: lists of deobfuscated actions and corresponding MSEs
"""
centers = kmeans.cluster_centers_
best_mses = []
best_actions = []
for centroid in centers:
best_mse = 13
best_action = None
for act in action_mapping_dict:
a = action_mapping_dict[act]
mse = ((centroid - a) ** 2).mean()
if mse < best_mse:
best_mse = mse
best_action = act
if 'camera' not in best_action[0]:
best_action = list(best_action)
for i in range(1, len(best_action)):
if 'camera' in best_action[i]:
best_action = tuple([best_action[i]] + best_action[:i] + best_action[i + 1:])
best_mses.append(np.sqrt(best_mse))
best_actions.append(best_action)
return best_mses, best_actions
def max_rewards(actions):
"""
Takes a set of actions and returns the maximum possible reward.
:param actions: a set of mini actions such as 'attack', 'craft: planks'
:return: maximum possible rewards with given mini actions
"""
max_reward = 0
if 'attack' in actions:
max_reward = 1
else:
return max_reward
if 'craft: planks' in actions:
max_reward = 3
else:
return max_reward
if 'craft: stick' in actions and 'craft: crafting_table' in actions:
max_reward = 11
elif 'craft: stick' in actions or 'craft: crafting_table' in actions:
max_reward = 7
return max_reward
else:
return max_reward
if 'place: crafting_table' in actions and 'nearbyCraft: wooden_pickaxe' in actions:
max_reward = 19
if 'equip: wooden_pickaxe' in actions:
max_reward = 35
else:
return max_reward
else:
return max_reward
if 'nearbyCraft: stone_pickaxe' in actions:
max_reward = 67
else:
return max_reward
if 'nearbyCraft: furnace' in actions and 'equip: stone_pickaxe' in actions:
max_reward = 163
elif 'nearbyCraft: furnace' in actions and 'equip: stone_pickaxe' not in actions:
max_reward = 99
return max_reward
elif 'equip: stone_pickaxe' in actions and 'nearbyCraft: furnace' not in actions:
max_reward = 131
return max_reward
else:
return max_reward
if 'place: furnace' in actions and 'nearbySmelt: iron_ingot' in actions:
max_reward = 291
else:
return max_reward
if 'nearbyCraft: iron_pickaxe' in actions:
max_reward = 547
else:
return max_reward
if 'equip: iron_pickaxe' in actions:
max_reward = 1571
else:
return max_reward
return max_reward
def camera_stats(path, file):
"""
Gets all vertical camera action angles (positive angle means down) from a given KMeans experiment.
:param path: path to the experiments
:param file: filename of the experiment
:return: a list of vertical camera action angles
"""
updown = []
with open(os.path.join(path, file)) as txt:
txt.readline()
for line in txt.readlines():
action = eval(line[line.index(',') + 1:-1])
camera_action = [float(i) for i in action[0][action[0].index('[') + 1:-1].split()]
updown.append(camera_action[0])
return updown