Explicit engine computes optimal strategy for Rmax=? [ C ].

prismmodelchecker · Feb 5, 2025 · 299a7c5 · 299a7c5
1 parent 40fa65c
commit 299a7c5
Showing 1 changed file with 22 additions and 5 deletions.
diff --git a/prism/src/explicit/MDPModelChecker.java b/prism/src/explicit/MDPModelChecker.java
@@ -1927,6 +1927,7 @@ public ModelCheckerResult computeTotalRewardsMax(MDP<Double> mdp, MDPRewards<Dou
 		int n;
 		long timer;
 		BitSet inf;
+		int strat[] = null;
 
 		// Local copy of setting
 		MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;
@@ -1947,8 +1948,17 @@ public ModelCheckerResult computeTotalRewardsMax(MDP<Double> mdp, MDPRewards<Dou
 		// Store num states
 		n = mdp.getNumStates();
 
-		long timerPre;
+		// If required, create/initialise strategy storage
+		// Set choices to -1, denoting unknown
+		if (genStrat) {
+			strat = new int[n];
+			for (int i = 0; i < n; i++) {
+				strat[i] = -1;
+			}
+		}
 
+		// Precomputation (not optional)
+		long timerPre;
 		if (noPositiveECs) {
 			// no inf states
 			inf = new BitSet();
@@ -1986,29 +1996,36 @@ public ModelCheckerResult computeTotalRewardsMax(MDP<Double> mdp, MDPRewards<Dou
 
 			// inf = Pmax[ <> positiveECs ] > 0
 			//     = ! (Pmax[ <> positiveECs ] = 0)
-			inf = prob0(mdp, null, positiveECs, false, null);  // Pmax[ <> positiveECs ] = 0
+			inf = prob0(mdp, null, positiveECs, false, strat);  // Pmax[ <> positiveECs ] = 0
 			inf.flip(0,n);  // !(Pmax[ <> positive ECs ] = 0) = Pmax[ <> positiveECs ] > 0
 
 			timerPre = System.currentTimeMillis() - timerPre;
 			mainLog.println("Precomputation took " + timerPre / 1000.0 + " seconds, " + inf.cardinality() + " infinite states, " + (n - inf.cardinality()) + " states remaining.");
 		}
 
+		// TODO: generate strategy for "inf" state (possibility to reach +ve EC)
+
 		// Compute rewards
 		// do standard max reward calculation, but with empty target set
 		switch (mdpSolnMethod) {
 		case VALUE_ITERATION:
-			res = computeReachRewardsValIter(mdp, mdpRewards, new BitSet(), inf, false, null, null, null);
+			res = computeReachRewardsValIter(mdp, mdpRewards, new BitSet(), inf, false, null, null, strat);
 			break;
 		case GAUSS_SEIDEL:
-			res = computeReachRewardsGaussSeidel(mdp, mdpRewards, new BitSet(), inf, false, null, null, null);
+			res = computeReachRewardsGaussSeidel(mdp, mdpRewards, new BitSet(), inf, false, null, null, strat);
 			break;
 		case POLICY_ITERATION:
-			res = computeReachRewardsPolIter(mdp, mdpRewards, new BitSet(), inf, false, null);
+			res = computeReachRewardsPolIter(mdp, mdpRewards, new BitSet(), inf, false, strat);
 			break;
 		default:
 			throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName());
 		}
 
+		// Store strategy
+		if (genStrat) {
+			res.strat = new MDStrategyArray<Double>(mdp, strat);
+		}
+
 		// Finished expected total reward
 		timer = System.currentTimeMillis() - timer;
 		mainLog.println("Expected total reward took " + timer / 1000.0 + " seconds.");