From c2778fcd112045dc1413656d3f9de7f61260072e Mon Sep 17 00:00:00 2001 From: shugo256 Date: Tue, 20 Jul 2021 23:57:06 +0900 Subject: [PATCH 1/4] merge reward code --- src/io/reward.py | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/io/reward.py b/src/io/reward.py index 92baff9..f1a7b43 100644 --- a/src/io/reward.py +++ b/src/io/reward.py @@ -8,24 +8,40 @@ class Reward(NamedTuple): """報酬を表すタプルオブジェクト """ - LAMBDA1 = 0.2 - LAMBDA2 = 0.1 - LAMBDA3 = 0.1 - T_MAX = 30 - T_MIN = 20 - T_TARGET = 25 metric1: float @classmethod def calc_metrix1(cls, state: BuildingState) -> float: + LAMBDA1 = 0.2 + LAMBDA2 = 0.1 + LAMBDA3 = 0.1 + LAMBDA4 = 20 + T_MAX = 30 + T_MIN = 20 + T_TARGET = 25 + area_temp = np.array([area.temperature for area in state.areas]) - reward = np.exp(-cls.LAMBDA1 * (area_temp - cls.T_TARGET) ** 2).sum() - reward += - cls.LAMBDA2 * (np.where((cls.T_MIN - area_temp) < 0, 0, (cls.T_MIN - area_temp)).sum()) - reward += - cls.LAMBDA2 * (np.where((area_temp - cls.T_MAX) < 0, 0, (area_temp - cls.T_MAX)).sum()) - reward += - cls.LAMBDA3 * state.electric_price_unit + reward = np.exp(-LAMBDA1 * (area_temp - T_TARGET) ** 2).sum() + reward += - LAMBDA2 * (np.where((T_MIN - area_temp) < 0, 0, (T_MIN - area_temp)).sum()) + reward += - LAMBDA2 * (np.where((area_temp - T_MAX) < 0, 0, (area_temp - T_MAX)).sum()) + reward += - LAMBDA3 * state.electric_price_unit + # reward += LAMBDA4 * state.charge_ratio + + ''' + print(np.exp(-lambda1 * (temp - T_target) ** 2).sum()) + print(-lambda2 * (np.where((T_min - temp) < 0, 0, (T_min - temp)).sum())) + print(-lambda2 * (np.where((temp - T_max) < 0, 0, (temp - T_max)).sum())) + print(-lambda3 * electric_price_unit) + print(lambda4*charge_ratio) + ''' + + # 人数の項を考える + # 太陽光の発電状況 + # 蓄電池の残量 + # 異なるrewardを考えた状況設定 return reward From a520d069dc64de204c5a0fef15ea5f026b0863f4 Mon Sep 17 00:00:00 2001 From: shugo256 Date: Tue, 20 Jul 2021 23:57:55 +0900 Subject: [PATCH 2/4] add mode to ES.__repr__ --- src/facility/electric_storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/facility/electric_storage.py b/src/facility/electric_storage.py index 334a66e..757e4bc 100644 --- a/src/facility/electric_storage.py +++ b/src/facility/electric_storage.py @@ -75,4 +75,4 @@ def from_xml_element(cls: Type[T], elem: Element) -> T: def __repr__(self) -> str: - return f"ES(charge_ratio={self.charge_ratio:.3f})" + return f"ES(charge_ratio={self.charge_ratio:.3f}, mode={self.mode})" From 8b9348a3e5a3babeeb6572e9007d43e6b7e6f2bd Mon Sep 17 00:00:00 2001 From: shugo256 Date: Tue, 20 Jul 2021 23:58:09 +0900 Subject: [PATCH 3/4] add BFS.print_cur_state --- src/bfs.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/bfs.py b/src/bfs.py index 1256a09..8f3cfdd 100644 --- a/src/bfs.py +++ b/src/bfs.py @@ -11,6 +11,8 @@ class BuildingFacilitySimulator: TODO: AI側からアクセスするときのメソッドを用意する(値取得、設定変更など) """ + steps: int = 0 + last_state: BuildingState areas: list[Area] = [] ext_envs: list[ExternalEnvironment] = [] area_envs: dict[int, list[AreaEnvironment]] = {} @@ -63,7 +65,20 @@ def step(self, action: BuildingAction) -> tuple[BuildingState, Reward]: state = BuildingState.create(area_states, ext_env.electric_price_unit) + self.steps = t + self.last_state = state + yield ( state, Reward.from_state(state) ) + + + def print_cur_state(self): + print(f"\niteration {self.steps}") + print(self.ext_envs[self.steps]) + + for aid, (area, st) in enumerate(zip(self.areas, self.last_state.areas)): + print(f"area {aid}: temp={area.temperature:.2f}, power={st.power_consumption:.2f}, {area.facilities[0]}") + + print(f"total power consumption: {self.last_state.power_balance:.2f}") From 08f4fddbe6ff1de66e80879ad722e628c4014408 Mon Sep 17 00:00:00 2001 From: shugo256 Date: Tue, 20 Jul 2021 23:58:31 +0900 Subject: [PATCH 4/4] adjust main_rl.py to the new simulator --- main_rl.py | 77 ++++++++++++++++++------------------------------------ 1 file changed, 26 insertions(+), 51 deletions(-) diff --git a/main_rl.py b/main_rl.py index 955a976..0ca7877 100644 --- a/main_rl.py +++ b/main_rl.py @@ -23,26 +23,24 @@ action_shape = (4,) # 各HVACの制御(3つ) + Electric Storageの制御(1つ) -def get_reward(temp, electric_price_unit, charge_ratio): - R = np.exp(-lambda1 * (temp - T_target) ** 2).sum() - - R += - lambda2 * (np.where((T_min - temp) < 0, 0, (T_min - temp)).sum()) - R += - lambda2 * (np.where((temp - T_max) < 0, 0, (temp - T_max)).sum()) - R += - lambda3 * electric_price_unit - #R += lambda4 * charge_ratio - - ''' - print(np.exp(-lambda1 * (temp - T_target) ** 2).sum()) - print(-lambda2 * (np.where((T_min - temp) < 0, 0, (T_min - temp)).sum())) - print(-lambda2 * (np.where((temp - T_max) < 0, 0, (temp - T_max)).sum())) - print(-lambda3 * electric_price_unit) - print(lambda4*charge_ratio) - ''' -# 人数の項を考える -# 太陽光の発電状況 -# 蓄電池の残量 -# 異なるrewardを考えた状況設定 - return R +def cvt_state_to_ndarray(state): + state_arr = [] + for area_id, area_state in enumerate(state.areas): + # 状態を獲得 + state_arr.extend([ + area_state.people, + area_state.temperature, + area_state.power_consumption + ]) + + if area_id == 4: + state_arr.append(area_state.facilities[0].charge_ratio) + + price = state.electric_price_unit + + state_arr.append(price) + + return np.array(state_arr) def action_to_temp(action): @@ -63,11 +61,6 @@ def action_to_ES(action): return mode -def print_area(area_id: str, area: Area, area_state: AreaState): - print( - f"area {area_id}: temp={area.temperature:.2f}, power={area_state.power_consumption:.2f}, {area.facilities[0]}") - - if __name__ == "__main__": #writer = SummaryWriter(log_dir="./logs") bfs = BuildingFacilitySimulator("BFS_environment.xml") @@ -90,38 +83,19 @@ def print_area(area_id: str, area: Area, area_state: AreaState): reward = np.zeros(1) temp = np.zeros(3) charge_ratio = 0 - for i, (building_state, reward) in enumerate(bfs.step(action)): - sleep(0.1) - print(f"\niteration {i}") - print(bfs.ext_envs[i]) - next_state = [] - for area_id, area in enumerate(bfs.areas): - - print_area(area_id, area, building_state.area_states[area_id]) - # 状態を獲得 - - people = building_state.area_states[area_id].people - temperature = building_state.area_states[area_id].temperature - power = building_state.area_states[area_id].power_consumption - each_state = np.array([people, temperature, power]) - next_state.extend(each_state) - - if area_id == 4: - charge_ratio = area.facilities[0].charge_ratio - next_state.append(area.facilities[0].charge_ratio) - price = bfs.ext_envs[i].electric_price_unit - - next_state.append(price) - next_state = np.array(next_state) - reward = get_reward(temp, price, charge_ratio) + for i, (state_obj, reward_obj) in enumerate(bfs.step(action)): + next_state = cvt_state_to_ndarray(state_obj) + reward = reward_obj.metric1 if i >= 1: Agent.replay_buffer.add( state, action_, next_state, reward, done=False) state = next_state + if i == 0: continue + if i >= 100: action_, _ = Agent.choose_action(state) else: @@ -154,5 +128,6 @@ def print_area(area_id: str, area: Area, area_state: AreaState): writer.add_scalar('charge_ratio', area.facilities[0].charge_ratio, i) ''' Agent.update() - print( - f"total power consumption: {building_state.power_balance:.2f} charge_mode: {mode}") + + if i % 60 == 0: + bfs.print_cur_state()