JuliaReinforcementLearning · findmyway · Jul 8, 2021 · Jul 7, 2021 · Jul 7, 2021 · Jul 7, 2021
diff --git a/src/ReinforcementLearningBase/src/interface.jl b/src/ReinforcementLearningBase/src/interface.jl
@@ -404,6 +404,15 @@ Make an independent copy of `env`,
 @api copy(env::AbstractEnv) = deepcopy(env)
 @api copyto!(dest::AbstractEnv, src::AbstractEnv)
 
+# checking the state of all players in env is enough?
+"""
+    Base.:(==)(env1::T, env2::T) where T<:AbstractEnv
+!!! warning: Only check the state of all players in the env.
-!!! warning: Only check the state of all players in the env.
+!!! warning
+    Only check the state of all players in the env.
-!!! warning: Only check the state of all players in the env.
+!!! warning
+    Only check the state of all players in the env.
+"""
+Base.:(==)(env1::T, env2::T) where T<:AbstractEnv = 
+    sum([state(env1, player) == state(env2, player) for player in players(env1)]) == length(players(env1))
+Base.hash(env::AbstractEnv, h::UInt) = hash([state(env, player) for player in players(env)], h)
+
 @api nameof(env::AbstractEnv) = nameof(typeof(env))
 
 """

diff --git a/src/ReinforcementLearningZoo/test/cfr/nash_conv.jl b/src/ReinforcementLearningZoo/test/cfr/nash_conv.jl
@@ -18,4 +18,11 @@
     env = OpenSpielEnv("kuhn_poker(players=4)")
     p = TabularRandomPolicy()
     @test RLZoo.nash_conv(p, env) ≈ 3.4760416666666663
+
+    env = KuhnPokerEnv()
+    p = TabularRandomPolicy()
+    @test RLZoo.nash_conv(p, env) == 11 / 12
+
+    p = get_optimal_kuhn_policy(env)
+    @test RLZoo.nash_conv(p, env) == 0.0
 end
diff --git a/src/ReinforcementLearningZoo/test/runtests.jl b/src/ReinforcementLearningZoo/test/runtests.jl
@@ -9,6 +9,7 @@ using Random
 using StableRNGs
 using OpenSpiel
 
+# used for OpenSpielEnv("kuhn_poker")
 function get_optimal_kuhn_policy(α = 0.2)
     TabularRandomPolicy(
         table = Dict(
@@ -28,6 +29,26 @@ function get_optimal_kuhn_policy(α = 0.2)
     )
 end
 
+# used for julia version KuhnPokerGame
+function get_optimal_kuhn_policy(env::KuhnPokerEnv; α = 0.2)
+    TabularRandomPolicy(
+        table = Dict(
+            (:J,) => [1 - α, α],
+            (:J, :pass, :bet) => [1.0, 0.0],
+            (:Q,) => [1.0, 0.0],
+            (:Q, :pass, :bet) => [2.0 / 3.0 - α, 1.0 / 3.0 + α],
+            (:K,) => [1 - 3 * α, 3 * α],
+            (:K, :pass, :bet) => [0.0, 1.0],
+            (:J, :pass) => [2.0 / 3.0, 1.0 / 3.0],
+            (:J, :bet) => [1.0, 0.0],
+            (:Q, :pass) => [1.0, 0.0],
+            (:Q, :bet) => [2.0 / 3.0, 1.0 / 3.0],
+            (:K, :pass) => [0.0, 1.0],
+            (:K, :bet) => [0.0, 1.0],
+        ),
+    )
+end
+
 @testset "ReinforcementLearningZoo.jl" begin
     include("cfr/cfr.jl")
 end