JuliaReinforcementLearning · findmyway · Jul 8, 2021 · Jul 7, 2021 · Jul 7, 2021 · Jul 7, 2021
diff --git a/src/ReinforcementLearningEnvironments/src/environments/examples/examples.jl b/src/ReinforcementLearningEnvironments/src/environments/examples/examples.jl
@@ -11,3 +11,7 @@ include("CartPoleEnv.jl")
 include("MountainCarEnv.jl")
 include("PendulumEnv.jl")
 include("BitFlippingEnv.jl")
+
+# checking the state of env is enough?
+Base.:(==)(env1::AbstractEnv, env2::AbstractEnv) = state(env1) == state(env2)
+Base.hash(env::AbstractEnv, h::UInt) = hash(state(env), h)
diff --git a/src/ReinforcementLearningZoo/test/cfr/nash_conv.jl b/src/ReinforcementLearningZoo/test/cfr/nash_conv.jl
@@ -4,7 +4,7 @@
 
     @test RLZoo.nash_conv(p, env) == 11 / 12
 
-    p = get_optimal_kuhn_policy()
+    p = get_optimal_kuhn_policy(env)
     @test RLZoo.nash_conv(p, env) == 0.0
 
     env = OpenSpielEnv("leduc_poker")
@@ -18,4 +18,11 @@
     env = OpenSpielEnv("kuhn_poker(players=4)")
     p = TabularRandomPolicy()
     @test RLZoo.nash_conv(p, env) ≈ 3.4760416666666663
+
+    env = KuhnPokerEnv()
+    p = TabularRandomPolicy()
+    @test RLZoo.nash_conv(p, env) == 11 / 12
+
+    p = get_optimal_kuhn_policy(env)
+    @test RLZoo.nash_conv(p, env) == 0.0
 end
diff --git a/src/ReinforcementLearningZoo/test/runtests.jl b/src/ReinforcementLearningZoo/test/runtests.jl
@@ -9,23 +9,43 @@ using Random
 using StableRNGs
 using OpenSpiel
 
-function get_optimal_kuhn_policy(α = 0.2)
-    TabularRandomPolicy(
-        table = Dict(
-            "0" => [1 - α, α],
-            "0pb" => [1.0, 0.0],
-            "1" => [1.0, 0.0],
-            "1pb" => [2.0 / 3.0 - α, 1.0 / 3.0 + α],
-            "2" => [1 - 3 * α, 3 * α],
-            "2pb" => [0.0, 1.0],
-            "0p" => [2.0 / 3.0, 1.0 / 3.0],
-            "0b" => [1.0, 0.0],
-            "1p" => [1.0, 0.0],
-            "1b" => [2.0 / 3.0, 1.0 / 3.0],
-            "2p" => [0.0, 1.0],
-            "2b" => [0.0, 1.0],
-        ),
-    )
+function get_optimal_kuhn_policy(env, α = 0.2)
+    if typeof(env) == KuhnPokerEnv
+        TabularRandomPolicy(
+            table = Dict(
+                (:J,) => [1 - α, α],
+                (:J, :pass, :bet) => [1.0, 0.0],
+                (:Q,) => [1.0, 0.0],
+                (:Q, :pass, :bet) => [2.0 / 3.0 - α, 1.0 / 3.0 + α],
+                (:K,) => [1 - 3 * α, 3 * α],
+                (:K, :pass, :bet) => [0.0, 1.0],
+                (:J, :pass) => [2.0 / 3.0, 1.0 / 3.0],
+                (:J, :bet) => [1.0, 0.0],
+                (:Q, :pass) => [1.0, 0.0],
+                (:Q, :bet) => [2.0 / 3.0, 1.0 / 3.0],
+                (:K, :pass) => [0.0, 1.0],
+                (:K, :bet) => [0.0, 1.0],
+            ),
+        )
+
+    else
+        TabularRandomPolicy(
+            table = Dict(
+                "0" => [1 - α, α],
+                "0pb" => [1.0, 0.0],
+                "1" => [1.0, 0.0],
+                "1pb" => [2.0 / 3.0 - α, 1.0 / 3.0 + α],
+                "2" => [1 - 3 * α, 3 * α],
+                "2pb" => [0.0, 1.0],
+                "0p" => [2.0 / 3.0, 1.0 / 3.0],
+                "0b" => [1.0, 0.0],
+                "1p" => [1.0, 0.0],
+                "1b" => [2.0 / 3.0, 1.0 / 3.0],
+                "2p" => [0.0, 1.0],
+                "2b" => [0.0, 1.0],
+            ),
+        )
+    end
 end
 
 @testset "ReinforcementLearningZoo.jl" begin