Skip to content

Commit

Permalink
Explicit engine computes optimal strategy for Rmax=? [ C ].
Browse files Browse the repository at this point in the history
  • Loading branch information
davexparker committed Feb 5, 2025
1 parent 40fa65c commit 299a7c5
Showing 1 changed file with 22 additions and 5 deletions.
27 changes: 22 additions & 5 deletions prism/src/explicit/MDPModelChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -1927,6 +1927,7 @@ public ModelCheckerResult computeTotalRewardsMax(MDP<Double> mdp, MDPRewards<Dou
int n;
long timer;
BitSet inf;
int strat[] = null;

// Local copy of setting
MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;
Expand All @@ -1947,8 +1948,17 @@ public ModelCheckerResult computeTotalRewardsMax(MDP<Double> mdp, MDPRewards<Dou
// Store num states
n = mdp.getNumStates();

long timerPre;
// If required, create/initialise strategy storage
// Set choices to -1, denoting unknown
if (genStrat) {
strat = new int[n];
for (int i = 0; i < n; i++) {
strat[i] = -1;
}
}

// Precomputation (not optional)
long timerPre;
if (noPositiveECs) {
// no inf states
inf = new BitSet();
Expand Down Expand Up @@ -1986,29 +1996,36 @@ public ModelCheckerResult computeTotalRewardsMax(MDP<Double> mdp, MDPRewards<Dou

// inf = Pmax[ <> positiveECs ] > 0
// = ! (Pmax[ <> positiveECs ] = 0)
inf = prob0(mdp, null, positiveECs, false, null); // Pmax[ <> positiveECs ] = 0
inf = prob0(mdp, null, positiveECs, false, strat); // Pmax[ <> positiveECs ] = 0
inf.flip(0,n); // !(Pmax[ <> positive ECs ] = 0) = Pmax[ <> positiveECs ] > 0

timerPre = System.currentTimeMillis() - timerPre;
mainLog.println("Precomputation took " + timerPre / 1000.0 + " seconds, " + inf.cardinality() + " infinite states, " + (n - inf.cardinality()) + " states remaining.");
}

// TODO: generate strategy for "inf" state (possibility to reach +ve EC)

// Compute rewards
// do standard max reward calculation, but with empty target set
switch (mdpSolnMethod) {
case VALUE_ITERATION:
res = computeReachRewardsValIter(mdp, mdpRewards, new BitSet(), inf, false, null, null, null);
res = computeReachRewardsValIter(mdp, mdpRewards, new BitSet(), inf, false, null, null, strat);
break;
case GAUSS_SEIDEL:
res = computeReachRewardsGaussSeidel(mdp, mdpRewards, new BitSet(), inf, false, null, null, null);
res = computeReachRewardsGaussSeidel(mdp, mdpRewards, new BitSet(), inf, false, null, null, strat);
break;
case POLICY_ITERATION:
res = computeReachRewardsPolIter(mdp, mdpRewards, new BitSet(), inf, false, null);
res = computeReachRewardsPolIter(mdp, mdpRewards, new BitSet(), inf, false, strat);
break;
default:
throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName());
}

// Store strategy
if (genStrat) {
res.strat = new MDStrategyArray<Double>(mdp, strat);
}

// Finished expected total reward
timer = System.currentTimeMillis() - timer;
mainLog.println("Expected total reward took " + timer / 1000.0 + " seconds.");
Expand Down

0 comments on commit 299a7c5

Please sign in to comment.