Merge pull request #68 from dityas/dev

Dev
dityas · Mar 12, 2020 · 37a6a04 · 37a6a04
2 parents 2c99811 + 81c5195
commit 37a6a04
Show file tree

Hide file tree

Showing 47 changed files with 3,499 additions and 738 deletions.
diff --git a/Protos/build/Protos.jar b/Protos/build/Protos.jar
diff --git a/Protos/domains/tiger.95.SPUDD.noisy.txt b/Protos/domains/tiger.95.SPUDD.noisy.txt
@@ -0,0 +1,47 @@
+
+(variables 
+ (tiger-location tiger-left tiger-right))
+
+(observations 
+ (growl growl-left growl-right))
+
+init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))
+
+action listen
+    tiger-location (SAMEtiger-location)
+    observe
+        growl (tiger-location' (tiger-left  (growl' (growl-left  (0.75))
+						    (growl-right (0.25))))
+			       (tiger-right (growl' (growl-left  (0.25))
+						    (growl-right (0.75)))))
+    endobserve
+    cost (1)
+endaction
+
+action open-left
+    tiger-location (tiger-location' (tiger-left (0.5)) (tiger-right (0.5)))
+    observe
+        growl (growl' (growl-left  (0.5))
+		      (growl-right (0.5)))
+    endobserve
+    cost (tiger-location (tiger-left  (100))
+			 (tiger-right (-10)))
+endaction
+
+action open-right
+    tiger-location (tiger-location' (tiger-left (0.5)) (tiger-right (0.5)))
+    observe
+        growl (growl' (growl-left  (0.5))
+		      (growl-right (0.5)))
+    endobserve
+    cost (tiger-location (tiger-left  (-10))
+			 (tiger-right (100)))
+endaction
+
+// discount factor
+discount  0.95
+
+// tolerance ??
+tolerance 0.001
+
+
diff --git a/Protos/domains/tiger.L1.enemy.txt b/Protos/domains/tiger.L1.enemy.txt
@@ -7,6 +7,8 @@
  	(creak creak-left creak-right silence)
 )
 
+most_probable_ai listen 0.99
+
 init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))
 
 unnormalized
@@ -271,17 +273,14 @@ discount  0.9
 tolerance 0.001
 
 (frames
-	(frame 0 level 0
+	(frame 0 level 0 def
 		(variables 
 		 	(tiger-location tiger-left tiger-right))
 
 		(observations 
 		 	(growl growl-left growl-right))
 
 		init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))
-		adjunct a1 (tiger-location (tiger-left (0.75)) (tiger-right (0.25)))
-		adjunct a2 (tiger-location (tiger-left (0.25)) (tiger-right (0.75)))
-//		adjunct a3 (tiger-location (tiger-left (0.99)) (tiger-right (0.01)))
 
 		action listen
 		    tiger-location (SAMEtiger-location)
@@ -315,7 +314,7 @@ tolerance 0.001
 		endaction
 
 		// discount factor
-		discount  0.95
+		discount  0.9
 
 		// tolerance ??
 		tolerance 0.001

diff --git a/Protos/domains/tiger.L1.txt b/Protos/domains/tiger.L1.txt
@@ -8,6 +8,8 @@
 )
 
 
+most_probable_ai listen 0.99
+
 unnormalized
 
 init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))

diff --git a/Protos/domains/tiger.L1multiple_new_parser.txt b/Protos/domains/tiger.L1multiple_new_parser.txt
@@ -211,10 +211,10 @@ tolerance 0.001
 		action listen
 		    tiger-location (SAMEtiger-location)
 		    observe
-		        growl (tiger-location' (tiger-left  (growl' (growl-left  (0.8))
-								    (growl-right (0.2))))
-					       (tiger-right (growl' (growl-left  (0.2))
-								    (growl-right (0.8)))))
+		        growl (tiger-location' (tiger-left  (growl' (growl-left  (0.85))
+								    (growl-right (0.15))))
+					       (tiger-right (growl' (growl-left  (0.15))
+								    (growl-right (0.85)))))
 		    endobserve
 		    cost (1)
 		endaction

diff --git a/Protos/src/thinclab/belief/SSGABeliefExpansion.java b/Protos/src/thinclab/belief/SSGABeliefExpansion.java
@@ -8,6 +8,7 @@
 package thinclab.belief;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 
@@ -49,7 +50,7 @@ public class SSGABeliefExpansion extends BeliefRegionExpansionStrategy {
 	/* number of iterations of SSGA during each expansion */
 	private int nIterations;
 
-	private static final Logger logger = Logger.getLogger(SSGABeliefExpansion.class);
+	private static final Logger LOGGER = Logger.getLogger(SSGABeliefExpansion.class);
 
 	// ----------------------------------------------------------------------------------------
 
@@ -70,7 +71,7 @@ public SSGABeliefExpansion(POMDP p, int maxDepth, int iterations) {
 		 * Run a full belief expansion for 3 time steps to facilitate proper exploration
 		 */
 
-		FullBeliefExpansion fb = new FullBeliefExpansion(this.p, 3);
+		FullBeliefExpansion fb = new FullBeliefExpansion(this.p, 1);
 		fb.expand();
 
 		this.initialBeliefs = new ArrayList<DD>();
@@ -81,7 +82,7 @@ public SSGABeliefExpansion(POMDP p, int maxDepth, int iterations) {
 
 		fb = null;
 
-		logger.debug("SSGA expansion search initialized");
+		LOGGER.debug("SSGA expansion search initialized");
 	}
 
 	public SSGABeliefExpansion(IPOMDP ip, int iterations) {
@@ -119,7 +120,7 @@ public void expand() {
 		 * Run SSGA expansion for nIterations
 		 */
 
-		logger.debug("Starting " + this.nIterations 
+		LOGGER.debug("Starting " + this.nIterations 
 				+ " expansions till depth " + this.getHBound() 
 				+ " from " + this.initialBeliefs.size() + " belief points.");
 
@@ -152,7 +153,7 @@ public void expand() {
 					else act = Global.random.nextInt(this.f.getActions().size());
 
 					DD obsDist = this.f.getObsDist(belief, this.f.getActions().get(act));
-
+					
 					int[][] obsConfig = null;
 
 					if (this.f.getType().contentEquals("POMDP"))
@@ -209,7 +210,7 @@ public void expand() {
 
 		} /* for iterations */
 
-		logger.debug("Total beliefs explored are " + this.exploredBeliefs.size());
+		LOGGER.debug("Total beliefs explored are " + this.exploredBeliefs.size());
 	}
 
 	@Override

diff --git a/Protos/src/thinclab/ddinterface/DDMaker.java b/Protos/src/thinclab/ddinterface/DDMaker.java
@@ -182,6 +182,7 @@ public DDTree getDDTreeFromSequence(String[] varSequence, String[][] values) {
 				}
 
 				try {
+//					logger.debug("Record " + record + " is " + Arrays.toString(currentRecord));
 					currentNode = currentNode.atChild(currentRecord[c]);
 				}
 				catch (Exception e) {

diff --git a/Protos/src/thinclab/decisionprocesses/DecisionProcess.java b/Protos/src/thinclab/decisionprocesses/DecisionProcess.java
@@ -14,6 +14,8 @@
 import java.util.List;
 import java.util.stream.Collectors;
 
+import org.apache.commons.lang3.ArrayUtils;
+
 import thinclab.belief.BeliefOperations;
 import thinclab.exceptions.VariableNotFoundException;
 import thinclab.exceptions.ZeroProbabilityObsException;
@@ -46,13 +48,20 @@ public abstract class DecisionProcess implements Serializable {
 	public abstract List<DD> getInitialBeliefs();
 	public abstract DD getCurrentBelief();
 	public abstract int[] getStateVarIndices();
+	public abstract int[] getStateVarPrimeIndices();
 	public abstract int[] getObsVarIndices();
+	public abstract int[] getObsVarPrimeIndices();
 	public abstract void setGlobals();
 	public abstract String getType();
 	public abstract String getBeliefString(DD belief);
 	public abstract DD getRewardFunctionForAction(String action);
+	public abstract DD[] getTiForAction(String action);
+	public abstract DD[] getOiForAction(String action);
 	public abstract void step(DD belief, String action, String[] obs) throws Exception;
 	public abstract void setTi(String action, DD[] Ti);
+	public abstract int getNumVars();
+	public abstract double evaluatePolicy(
+			DD[] alphaVectors, int[] policy, int trials, int evalDepth, boolean verbose);
 
 	// ---------------------------------------------------------------------------------
 
@@ -70,6 +79,23 @@ public static String getActionFromPolicy(
 		return bestAction;
 	}
 
+	public static String[] configToStrings(int[][] config) {
+
+		/*
+		 * Converts config arrays to their String value representations
+		 * 
+		 * Mostly used to convert obsConfigs to Observation value arrays
+		 */
+
+		String[] vals = new String[config[0].length];
+
+		for (int varI = 0; varI < config[0].length; varI ++) {
+			vals[varI] = Global.valNames[config[0][varI] - 1][config[1][varI] - 1];
+		}
+
+		return vals;
+	}
+
 	// --------------------------------------------------------------------------------
 
 	public static int getVarIndex(String varName) throws VariableNotFoundException {
@@ -113,10 +139,21 @@ public static int getBestAlphaIndex(DecisionProcess DP, DD belief, DD[] alphaVec
 		double val;
 		int bestAlphaId = 0;
 
+		int[] varIndices = null;
+
+		if (DP.getType().contentEquals("IPOMDP")) 
+			varIndices = 
+				ArrayUtils.subarray(
+						DP.getStateVarIndices(), 
+						0, ((IPOMDP) DP).thetaVarPosition);
+
+		else
+			varIndices = DP.getStateVarIndices();
+
 		double[] values = new double[alphaVectors.length];
 		for (int alphaId = 0; alphaId < alphaVectors.length; alphaId++) {
 
-			val = OP.dotProduct(belief, alphaVectors[alphaId], DP.getStateVarIndices());
+			val = OP.dotProduct(belief, alphaVectors[alphaId], varIndices);
 			values[alphaId] = val;
 
 			if (val >= bestVal) {
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,8 @@ @@
     )
+    most_probable_ai listen 0.99
     unnormalized
     init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))
@@ Expand Down @@