Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add Base.:(==) and Base.hash for AbstractEnv and test nash_conv on KuhnPokerEnv #348

Merged
merged 9 commits into from
Jul 8, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ include("CartPoleEnv.jl")
include("MountainCarEnv.jl")
include("PendulumEnv.jl")
include("BitFlippingEnv.jl")

# checking the state of env is enough?
Base.:(==)(env1::AbstractEnv, env2::AbstractEnv) = state(env1) == state(env2)
Base.hash(env::AbstractEnv, h::UInt) = hash(state(env), h)
9 changes: 8 additions & 1 deletion src/ReinforcementLearningZoo/test/cfr/nash_conv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

@test RLZoo.nash_conv(p, env) == 11 / 12

p = get_optimal_kuhn_policy()
p = get_optimal_kuhn_policy(env)
@test RLZoo.nash_conv(p, env) == 0.0

env = OpenSpielEnv("leduc_poker")
Expand All @@ -18,4 +18,11 @@
env = OpenSpielEnv("kuhn_poker(players=4)")
p = TabularRandomPolicy()
@test RLZoo.nash_conv(p, env) ≈ 3.4760416666666663

env = KuhnPokerEnv()
p = TabularRandomPolicy()
@test RLZoo.nash_conv(p, env) == 11 / 12

p = get_optimal_kuhn_policy(env)
@test RLZoo.nash_conv(p, env) == 0.0
end
54 changes: 37 additions & 17 deletions src/ReinforcementLearningZoo/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,43 @@ using Random
using StableRNGs
using OpenSpiel

function get_optimal_kuhn_policy(α = 0.2)
TabularRandomPolicy(
table = Dict(
"0" => [1 - α, α],
"0pb" => [1.0, 0.0],
"1" => [1.0, 0.0],
"1pb" => [2.0 / 3.0 - α, 1.0 / 3.0 + α],
"2" => [1 - 3 * α, 3 * α],
"2pb" => [0.0, 1.0],
"0p" => [2.0 / 3.0, 1.0 / 3.0],
"0b" => [1.0, 0.0],
"1p" => [1.0, 0.0],
"1b" => [2.0 / 3.0, 1.0 / 3.0],
"2p" => [0.0, 1.0],
"2b" => [0.0, 1.0],
),
)
function get_optimal_kuhn_policy(env, α = 0.2)
if typeof(env) == KuhnPokerEnv
TabularRandomPolicy(
table = Dict(
(:J,) => [1 - α, α],
(:J, :pass, :bet) => [1.0, 0.0],
(:Q,) => [1.0, 0.0],
(:Q, :pass, :bet) => [2.0 / 3.0 - α, 1.0 / 3.0 + α],
(:K,) => [1 - 3 * α, 3 * α],
(:K, :pass, :bet) => [0.0, 1.0],
(:J, :pass) => [2.0 / 3.0, 1.0 / 3.0],
(:J, :bet) => [1.0, 0.0],
(:Q, :pass) => [1.0, 0.0],
(:Q, :bet) => [2.0 / 3.0, 1.0 / 3.0],
(:K, :pass) => [0.0, 1.0],
(:K, :bet) => [0.0, 1.0],
),
)

else
TabularRandomPolicy(
table = Dict(
"0" => [1 - α, α],
"0pb" => [1.0, 0.0],
"1" => [1.0, 0.0],
"1pb" => [2.0 / 3.0 - α, 1.0 / 3.0 + α],
"2" => [1 - 3 * α, 3 * α],
"2pb" => [0.0, 1.0],
"0p" => [2.0 / 3.0, 1.0 / 3.0],
"0b" => [1.0, 0.0],
"1p" => [1.0, 0.0],
"1b" => [2.0 / 3.0, 1.0 / 3.0],
"2p" => [0.0, 1.0],
"2b" => [0.0, 1.0],
),
)
end
end

@testset "ReinforcementLearningZoo.jl" begin
Expand Down