-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathai.py
49 lines (35 loc) · 1.42 KB
/
ai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import numpy
import sys
from game import get_available_actions
class AI():
def __init__(self):
self.q = dict()
self.alpha = 0.5
def choose_action(self, board):
available_actions = list(get_available_actions(board))
board_flat = list(numpy.array(board).flat)
best_q = -sys.maxsize - 1
chosen_action = list(available_actions)[0]
for action in available_actions:
q_val = self.get_q(board_flat, action)
if (q_val > best_q):
best_q = q_val
chosen_action = action
return chosen_action
def update_q(self, board, action, new_board, reward):
board_flat = list(numpy.array(board).flat)
q = self.get_q(board_flat, action)
available_actions = get_available_actions(new_board)
new_board_flat = list(numpy.array(new_board).flat)
future_reward = self.get_best_reward(available_actions, new_board_flat)
self.q[tuple(board_flat), action] = q + self.alpha * \
(reward + future_reward - q)
def get_q(self, board_flat, action):
return self.q.get((tuple(board_flat), action)) or 0
def get_best_reward(self, available_actions, board_flat):
best_q = -sys.maxsize - 1
for action in available_actions:
q_val = self.get_q(board_flat, action)
if (q_val > best_q):
best_q = q_val
return best_q