forked from Deadsg/DQNAgent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathQ_Loop_system.py
154 lines (111 loc) · 5.21 KB
/
Q_Loop_system.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import numpy as np
class CustomQLearning:
def __init__(self, state_space_size, action_space_size, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.2):
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.exploration_rate = exploration_rate
self.q_table = np.zeros((state_space_size, action_space_size))
def choose_action(self, state):
if np.random.rand() < self.exploration_rate:
return np.random.choice(self.q_table.shape[1])
else:
return np.argmax(self.q_table[state, :])
def update_q_table(self, state, action, reward, next_state):
current_q = self.q_table[state, action]
best_next_q = np.max(self.q_table[next_state, :])
new_q = current_q + self.learning_rate * (reward + self.discount_factor * best_next_q - current_q)
self.q_table[state, action] = new_q
state_space_size = 10
action_space_size = 4
q_learning_agent = CustomQLearning(state_space_size, action_space_size)
for episode in range(1000):
current_state = np.random.randint(state_space_size)
for step in range(100):
action = q_learning_agent.choose_action(current_state)
next_state = np.random.randint(state_space_size)
reward = np.random.randint(-10, 10)
q_learning_agent.update_q_table(current_state, action, reward, next_state)
current_state = next_state
def test_agent(agent, state):
action = agent.choose_action(state)
return action
test_state = 5
optimal_action = test_agent(q_learning_agent, test_state)
def print_q_table(q_table):
print("Q-Table:")
print(q_table)
state_space_size = 10
action_space_size = 4
q_learning_agent = CustomQLearning(state_space_size, action_space_size)
for episode in range(1000):
current_state = np.random.randint(state_space_size)
for step in range(100):
action = q_learning_agent.choose_action(current_state)
next_state = np.random.randint(state_space_size)
reward = np.random.randint(-10, 10)
q_learning_agent.update_q_table(current_state, action, reward, next_state)
current_state = next_state
print_q_table(q_learning_agent.q_table)
print(f"Optimal action for state {test_state}: {optimal_action}")
class CustomQLearning:
def __init__(self, state_space_size, action_space_size, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.2):
self.learning_rate = learning_rate
self.discount_factor = discount_factor
self.exploration_rate = exploration_rate
self.q_table = np.zeros((state_space_size, action_space_size))
def choose_action(self, state):
if np.random.rand() < self.exploration_rate:
return np.random.choice(self.q_table.shape[1])
else:
return np.argmax(self.q_table[state, :])
def update_q_table(self, state, action, reward, next_state):
current_q = self.q_table[state, action]
best_next_q = np.max(self.q_table[next_state, :])
new_q = current_q + self.learning_rate * (reward + self.discount_factor * best_next_q - current_q)
self.q_table[state, action] = new_q
def print_q_table(q_table):
print("Q-Table:")
print(q_table)
def respond_to_action(action):
# Simple responses based on action
responses = {
0: "I don't know what to say.",
1: "Hello!",
2: "How are you?",
3: "Goodbye!",
# Add more responses as needed
}
return responses.get(action, "I don't understand.")
state_space_size = 10
action_space_size = 4
q_learning_agent = CustomQLearning(state_space_size, action_space_size)
print("Welcome to the Q-learning Chatbot!")
print_q_table(q_learning_agent.q_table)
# This is the while loop that handles user input and output
while True:
# Ask the user to enter a message or type "exit" to exit
user_input = input("You: ")
# If the user types "exit", break the loop and end the chat
if user_input.lower() == "exit":
break
# Convert the user input to a next state, which is an integer between 0 and state_space_size - 1
# You can use any method to do this, such as hashing, encoding, or mapping
# For simplicity, I will use a simple hash function that takes the modulo of the sum of the ASCII values of the characters
next_state = sum(ord(c) for c in user_input) % state_space_size
# Choose a random reward, which is an integer between -10 and 10
reward = np.random.randint(-10, 10)
# Choose an action based on the current state using the Q-learning agent
action = q_learning_agent.choose_action(current_state)
# Update the Q-table based on the current state, action, reward, and next state
q_learning_agent.update_q_table(current_state, action, reward, next_state)
# Convert the action to a response, which is a string
agent_response = respond_to_action(action)
# Print the agent response
print("Q-Bot:", agent_response)
# Print the Q-table
print_q_table(q_learning_agent.q_table)
# Set the current state to the next state
current_state = next_state
print(f"Q-Bot: {respond_to_action(action)}")
print_q_table(q_learning_agent.q_table)
print("Goodbye! Thanks for chatting with the Q-learning Chatbot.")