diff --git a/opfgym/envs/eco_dispatch.py b/opfgym/envs/eco_dispatch.py index ebe3a2b..440d491 100644 --- a/opfgym/envs/eco_dispatch.py +++ b/opfgym/envs/eco_dispatch.py @@ -54,7 +54,8 @@ def __init__(self, simbench_network_name='1-HV-urban--0-sw', act_keys = [('sgen', 'p_mw', net.sgen.index[net.sgen.controllable]), ('gen', 'p_mw', net.gen.index[net.gen.controllable])] - super().__init__(net, act_keys, obs_keys, profiles, *args, **kwargs) + super().__init__(net, act_keys, obs_keys, profiles=profiles, + *args, **kwargs) def _define_opf(self, simbench_network_name, *args, **kwargs): net, profiles = build_simbench_net( diff --git a/opfgym/envs/load_shedding.py b/opfgym/envs/load_shedding.py index d35b403..3753dce 100644 --- a/opfgym/envs/load_shedding.py +++ b/opfgym/envs/load_shedding.py @@ -56,7 +56,8 @@ def __init__(self, simbench_network_name='1-MV-comm--2-sw', act_keys = [('load', 'p_mw', net.load.index[net.load.controllable]), ('storage', 'p_mw', net.storage.index[net.storage.controllable])] - super().__init__(net, act_keys, obs_keys, profiles, *args, **kwargs) + super().__init__(net, act_keys, obs_keys, profiles=profiles, + *args, **kwargs) def _define_opf(self, simbench_network_name, *args, **kwargs): net, profiles = build_simbench_net( diff --git a/opfgym/envs/max_renewable.py b/opfgym/envs/max_renewable.py index 7ee9bac..6df6488 100644 --- a/opfgym/envs/max_renewable.py +++ b/opfgym/envs/max_renewable.py @@ -48,7 +48,8 @@ def __init__(self, simbench_network_name='1-HV-mixed--1-sw', ('storage', 'p_mw', net.storage.index[net.storage.controllable]) ] - super().__init__(net, act_keys, obs_keys, profiles, *args, **kwargs) + super().__init__(net, act_keys, obs_keys, profiles=profiles, + *args, **kwargs) def _define_opf(self, simbench_network_name, *args, **kwargs): net, profiles = build_simbench_net( diff --git a/opfgym/envs/voltage_control.py b/opfgym/envs/voltage_control.py index 50eb6cf..033005e 100644 --- a/opfgym/envs/voltage_control.py +++ b/opfgym/envs/voltage_control.py @@ -56,7 +56,8 @@ def __init__(self, simbench_network_name='1-MV-semiurb--1-sw', act_keys = [('sgen', 'q_mvar', net.sgen.index[net.sgen.controllable]), ('storage', 'q_mvar', net.storage.index[net.storage.controllable])] - super().__init__(net, act_keys, obs_keys, profiles, *args, **kwargs) + super().__init__(net, act_keys, obs_keys, profiles=profiles, + *args, **kwargs) def _define_opf(self, simbench_network_name, *args, **kwargs): net, profiles = build_simbench_net( diff --git a/opfgym/examples/custom_constraint.py b/opfgym/examples/custom_constraint.py index a5cd567..c7d7b12 100644 --- a/opfgym/examples/custom_constraint.py +++ b/opfgym/examples/custom_constraint.py @@ -45,7 +45,7 @@ def __init__(self, simbench_network_name='1-LV-urban6--0-sw', **constraint_kwargs) constraints_list.append(s_mva_constraint) - super().__init__(net, act_keys, obs_keys, profiles, + super().__init__(net, act_keys, obs_keys, profiles=profiles, optimal_power_flow_solver=False, constraints=constraints_list, *args, **kwargs) diff --git a/opfgym/examples/mixed_continuous_discrete.py b/opfgym/examples/mixed_continuous_discrete.py index 1630083..6009672 100644 --- a/opfgym/examples/mixed_continuous_discrete.py +++ b/opfgym/examples/mixed_continuous_discrete.py @@ -40,7 +40,7 @@ def __init__(self, simbench_network_name='1-LV-urban6--0-sw', act_keys = [('sgen', 'q_mvar', net.sgen.index), ('trafo', 'tap_pos', net.trafo.index)] - super().__init__(net, act_keys, obs_keys, profiles, + super().__init__(net, act_keys, obs_keys, profiles=profiles, objective_function=custom_objective_function, optimal_power_flow_solver=False, *args, **kwargs) diff --git a/opfgym/examples/multi_stage.py b/opfgym/examples/multi_stage.py index ea4b990..34bd004 100644 --- a/opfgym/examples/multi_stage.py +++ b/opfgym/examples/multi_stage.py @@ -39,7 +39,7 @@ def __init__(self, simbench_network_name='1-LV-urban6--0-sw', # Control all generators in the system act_keys = [('sgen', 'p_mw', net.sgen.index)] - super().__init__(net, act_keys, obs_keys, profiles, + super().__init__(net, act_keys, obs_keys, profiles=profiles, steps_per_episode=steps_per_episode, optimal_power_flow_solver=False, *args, **kwargs) diff --git a/opfgym/examples/network_reconfiguration.py b/opfgym/examples/network_reconfiguration.py index 80ee4d1..01a3b20 100644 --- a/opfgym/examples/network_reconfiguration.py +++ b/opfgym/examples/network_reconfiguration.py @@ -34,7 +34,7 @@ def __init__(self, simbench_network_name='1-HV-urban--0-sw', act_keys = [('switch', 'closed', net.switch.index[net.switch.controllable]), ('trafo', 'tap_pos', net.trafo.index[net.trafo.controllable])] - super().__init__(net, act_keys, obs_keys, profiles, + super().__init__(net, act_keys, obs_keys, profiles=profiles, optimal_power_flow_solver=False, *args, **kwargs) def _define_opf(self, simbench_network_name, *args, **kwargs): diff --git a/opfgym/examples/partial_obs.py b/opfgym/examples/partial_obs.py index 46a3eec..677f00c 100644 --- a/opfgym/examples/partial_obs.py +++ b/opfgym/examples/partial_obs.py @@ -28,10 +28,18 @@ def __init__(self, simbench_network_name='1-LV-rural1--0-sw', ('load', 'q_mvar', observable_loads), ] + # Since state and observation are not equal for this env, we need to + # define the state space explicitly (e.g. required for sampling) + state_keys = [ + ('load', 'p_mw', net.load.index), + ('load', 'q_mvar', net.load.index) + ] + # ... and control some selected switches in the system act_keys = [('sgen', 'p_mw', net.sgen.index)] - super().__init__(net, act_keys, obs_keys, profiles, *args, **kwargs) + super().__init__(net, act_keys, obs_keys, state_keys=state_keys, + profiles=profiles, *args, **kwargs) def _define_opf(self, simbench_network_name, *args, **kwargs): net, profiles = build_simbench_net( diff --git a/opfgym/examples/pure_constraint_satisfaction.py b/opfgym/examples/pure_constraint_satisfaction.py index 537f671..87dbc00 100644 --- a/opfgym/examples/pure_constraint_satisfaction.py +++ b/opfgym/examples/pure_constraint_satisfaction.py @@ -21,7 +21,7 @@ def __init__(self, **kwargs): # ... and control some selected switches in the system act_keys = [('sgen', 'p_mw', net.sgen.index)] - super().__init__(net, act_keys, obs_keys, profiles, **kwargs) + super().__init__(net, act_keys, obs_keys, profiles=profiles, **kwargs) def _define_opf(self): net, profiles = build_simbench_net('1-LV-rural1--0-sw') diff --git a/opfgym/examples/security_constrained.py b/opfgym/examples/security_constrained.py index 4359a8d..7f23202 100644 --- a/opfgym/examples/security_constrained.py +++ b/opfgym/examples/security_constrained.py @@ -27,7 +27,7 @@ def __init__(self, simbench_network_name='1-HV-urban--0-sw', # ... and control some selected switches in the system act_keys = [('sgen', 'p_mw', net.sgen.index)] - super().__init__(net, act_keys, obs_keys, profiles, + super().__init__(net, act_keys, obs_keys, profiles=profiles, optimal_power_flow_solver=False, *args, **kwargs) def _define_opf(self, simbench_network_name, *args, **kwargs): diff --git a/opfgym/opf_env.py b/opfgym/opf_env.py index 55dba99..17411b9 100644 --- a/opfgym/opf_env.py +++ b/opfgym/opf_env.py @@ -28,6 +28,7 @@ def __init__(self, net: pp.pandapowerNet, action_keys: tuple[tuple[str, str, np.ndarray], ...], observation_keys: tuple[tuple[str, str, np.ndarray], ...], + state_keys: tuple[tuple[str, str, np.ndarray], ...]=None, profiles: dict[str, pd.DataFrame]=None, evaluate_on: str='validation', steps_per_episode: int=1, @@ -56,6 +57,7 @@ def __init__(self, self.net = net self.obs_keys = observation_keys + self.state_keys = state_keys or copy.copy(observation_keys) self.act_keys = action_keys self.profiles = profiles @@ -117,11 +119,13 @@ def __init__(self, self.add_mean_obs = add_mean_obs - # Define observation and action space + # Define observation, state, and action spaces self.bus_wise_obs = bus_wise_obs - self.observation_space = get_obs_space( + self.observation_space = get_obs_and_state_space( self.net, self.obs_keys, add_time_obs, add_mean_obs, - seed, bus_wise_obs=bus_wise_obs) + seed=seed, bus_wise_obs=bus_wise_obs) + self.state_space = get_obs_and_state_space( + self.net, self.state_keys, seed=seed) n_actions = sum([len(idxs) for _, _, idxs in self.act_keys]) self.action_space = gym.spaces.Box(0, 1, shape=(n_actions,), seed=seed) @@ -206,7 +210,9 @@ def reset(self, seed=None, options=None) -> tuple: self.initial_obj = self.calculate_objective(diff_objective=False) - return self._get_obs(self.obs_keys, self.add_time_obs), copy.deepcopy(self.info) + obs = self._get_obs(self.obs_keys, self.add_time_obs, self.add_mean_obs) + + return obs, copy.deepcopy(self.info) def _sampling(self, step=None, test=False, sample_new=True, *args, **kwargs) -> None: @@ -248,7 +254,7 @@ def _sample_uniform(self, sample_keys=None, sample_new=True) -> None: """ assert sample_new, 'Currently only implemented for sample_new=True' if not sample_keys: - sample_keys = self.obs_keys + sample_keys = self.state_keys for unit_type, column, idxs in sample_keys: if 'res_' not in unit_type: self._sample_from_range(unit_type, column, idxs) @@ -277,7 +283,7 @@ def _sample_normal(self, relative_std=None, truncated=False, sample_new=True, **kwargs) -> None: """ Sample data around mean values from simbench data. """ assert sample_new, 'Currently only implemented for sample_new=True' - for unit_type, column, idxs in self.obs_keys: + for unit_type, column, idxs in self.state_keys: if 'res_' in unit_type or 'poly_cost' in unit_type: continue @@ -396,7 +402,7 @@ def step(self, action, *args, **kwargs) -> tuple: terminated = False truncated = False - obs = self._get_obs(self.obs_keys, self.add_time_obs) + obs = self._get_obs(self.obs_keys, self.add_time_obs, self.add_mean_obs) assert not np.isnan(obs).any() return obs, reward, terminated, truncated, copy.deepcopy(self.info) @@ -510,13 +516,14 @@ def calculate_reward(self) -> float: return reward - def _get_obs(self, obs_keys, add_time_obs) -> np.ndarray: + def _get_obs(self, obs_keys, add_time_obs=False, add_mean_obs=False + ) -> np.ndarray: obss = [(self.net[unit_type].loc[idxs, column].to_numpy()) if (unit_type != 'load' or not self.bus_wise_obs) else get_bus_aggregated_obs(self.net, 'load', column, idxs) for unit_type, column, idxs in obs_keys] - if self.add_mean_obs: + if add_mean_obs: mean_obs = [np.mean(partial_obs) for partial_obs in obss if len(partial_obs) > 1] obss.append(mean_obs) @@ -528,6 +535,12 @@ def _get_obs(self, obs_keys, add_time_obs) -> np.ndarray: return np.concatenate(obss) + def get_state(self) -> np.ndarray: + """ Steal the popgym API to provide the full state of the system. + Compare https://popgym.readthedocs.io/en/latest/autoapi/popgym/core/env/index.html + """ + return self._get_obs(self.state_keys) + def render(self, **kwargs): """ Render the current state of the power system. Uses the `simple_plot` pandapower method. Overwrite for more sophisticated rendering. For @@ -632,12 +645,12 @@ def default_optimal_power_flow(net, calculate_voltage_angles=False, **kwargs): pp.runopp(net, calculate_voltage_angles=calculate_voltage_angles, **kwargs) - -def get_obs_space(net, obs_keys: list, add_time_obs: bool, - add_mean_obs: bool=False, - seed: int=None, last_n_obs: int=1, bus_wise_obs=False - ) -> gym.spaces.Box: - """ Get observation space from the constraints of the power network. """ +def get_obs_and_state_space(net: pp.pandapowerNet, obs_or_state_keys: list, + add_time_obs: bool=False, add_mean_obs: bool=False, + seed: int=None, last_n_obs: int=1, + bus_wise_obs=False) -> gym.spaces.Box: + """ Get observation or state space from the constraints of the power + network. """ lows, highs = [], [] if add_time_obs: @@ -646,7 +659,7 @@ def get_obs_space(net, obs_keys: list, add_time_obs: bool, lows.append(-np.ones(6)) highs.append(np.ones(6)) - for unit_type, column, idxs in obs_keys: + for unit_type, column, idxs in obs_or_state_keys: if 'res_' in unit_type: # The constraints are never defined in the results table unit_type = unit_type[4:] diff --git a/tests/test_opf_env.py b/tests/test_opf_env.py index 5a93549..37726b7 100644 --- a/tests/test_opf_env.py +++ b/tests/test_opf_env.py @@ -18,16 +18,21 @@ def test_base_class_API(): net.load.loc[:, 'max_max_p_mw'] = 3 # Define action space - act_keys = [('load', 'q_mvar', net.load.index)] - net.load.loc[:, 'min_q_mvar'] = -1 - net.load.loc[:, 'max_q_mvar'] = 1 + act_keys = [('sgen', 'p_mw', net.sgen.index)] + print(act_keys) + net.sgen.loc[:, 'min_p_mw'] = 0 + net.sgen.loc[:, 'max_p_mw'] = net.sgen.loc[:, 'p_mw'] - env = opf_env.OpfEnv(net, act_keys, obs_keys, + env = opf_env.OpfEnv(net, act_keys, obs_keys, test_data='full_uniform', train_data='full_uniform', seed=42) - env.reset() - env.step(env.action_space.sample()) + # Test API + obs, info = env.reset(options={'seed': 1}) + env.step(np.array([0.5, 0.5])) + assert env.get_current_actions().shape == env.action_space.shape + assert env.get_state().shape == env.state_space.shape + # TODO: Add rest of the API (problem: pf not converging currently) def test_obs_space_def(): dummy_env.reset() @@ -43,11 +48,11 @@ def test_obs_space_def(): ('res_ext_grid', 'q_mvar', np.array([0])), ) - obs_space = opf_env.get_obs_space( + obs_space = opf_env.get_obs_and_state_space( dummy_env.net, obs_keys, add_time_obs=False, seed=42) assert len(obs_space.low) == 9 - obs_space = opf_env.get_obs_space( + obs_space = opf_env.get_obs_and_state_space( dummy_env.net, obs_keys, add_time_obs=True, seed=42) assert len(obs_space.high) == 15