-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_ai.py
56 lines (40 loc) · 1.22 KB
/
test_ai.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy
from ai import AI
from game import EMPTY, X, O
class TestAI:
def test_choose_action(self):
board = [
[O, O, X],
[O, X, EMPTY],
[X, X, EMPTY]
]
board_flat = list(numpy.array(board).flat)
ai = AI()
ai.q[tuple(board_flat), (1, 2)] = -0.8
ai.q[tuple(board_flat), (2, 2)] = 0.8
action = ai.choose_action(board)
assert action == (2, 2)
def test_update_q(self):
board = [
[O, O, EMPTY],
[O, X, EMPTY],
[X, X, EMPTY]
]
board_flat = list(numpy.array(board).flat)
action = (0, 2)
new_board = [
[O, O, X],
[O, X, EMPTY],
[X, X, EMPTY]
]
new_board_flat = list(numpy.array(new_board).flat)
ai = AI()
reward = 0.6
q = 0.4
ai.q[tuple(board_flat), action] = q
future_reward = 0.8
ai.q[tuple(new_board_flat), (1, 2)] = -0.8
ai.q[tuple(new_board_flat), (2, 2)] = future_reward
ai.update_q(board, action, new_board, reward)
assert ai.q.get((tuple(board_flat), action)) == q + \
ai.alpha * (reward + future_reward - q)