Skip to content

Commit

Permalink
Merge pull request #68 from dityas/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
dityas authored Mar 12, 2020
2 parents 2c99811 + 81c5195 commit 37a6a04
Show file tree
Hide file tree
Showing 47 changed files with 3,499 additions and 738 deletions.
Binary file modified Protos/build/Protos.jar
Binary file not shown.
47 changes: 47 additions & 0 deletions Protos/domains/tiger.95.SPUDD.noisy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

(variables
(tiger-location tiger-left tiger-right))

(observations
(growl growl-left growl-right))

init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))

action listen
tiger-location (SAMEtiger-location)
observe
growl (tiger-location' (tiger-left (growl' (growl-left (0.75))
(growl-right (0.25))))
(tiger-right (growl' (growl-left (0.25))
(growl-right (0.75)))))
endobserve
cost (1)
endaction

action open-left
tiger-location (tiger-location' (tiger-left (0.5)) (tiger-right (0.5)))
observe
growl (growl' (growl-left (0.5))
(growl-right (0.5)))
endobserve
cost (tiger-location (tiger-left (100))
(tiger-right (-10)))
endaction

action open-right
tiger-location (tiger-location' (tiger-left (0.5)) (tiger-right (0.5)))
observe
growl (growl' (growl-left (0.5))
(growl-right (0.5)))
endobserve
cost (tiger-location (tiger-left (-10))
(tiger-right (100)))
endaction

// discount factor
discount 0.95

// tolerance ??
tolerance 0.001


9 changes: 4 additions & 5 deletions Protos/domains/tiger.L1.enemy.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
(creak creak-left creak-right silence)
)

most_probable_ai listen 0.99

init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))

unnormalized
Expand Down Expand Up @@ -271,17 +273,14 @@ discount 0.9
tolerance 0.001

(frames
(frame 0 level 0
(frame 0 level 0 def
(variables
(tiger-location tiger-left tiger-right))

(observations
(growl growl-left growl-right))

init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))
adjunct a1 (tiger-location (tiger-left (0.75)) (tiger-right (0.25)))
adjunct a2 (tiger-location (tiger-left (0.25)) (tiger-right (0.75)))
// adjunct a3 (tiger-location (tiger-left (0.99)) (tiger-right (0.01)))

action listen
tiger-location (SAMEtiger-location)
Expand Down Expand Up @@ -315,7 +314,7 @@ tolerance 0.001
endaction

// discount factor
discount 0.95
discount 0.9

// tolerance ??
tolerance 0.001
Expand Down
2 changes: 2 additions & 0 deletions Protos/domains/tiger.L1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
)


most_probable_ai listen 0.99

unnormalized

init (tiger-location (tiger-left (0.5)) (tiger-right (0.5)))
Expand Down
8 changes: 4 additions & 4 deletions Protos/domains/tiger.L1multiple_new_parser.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,10 +211,10 @@ tolerance 0.001
action listen
tiger-location (SAMEtiger-location)
observe
growl (tiger-location' (tiger-left (growl' (growl-left (0.8))
(growl-right (0.2))))
(tiger-right (growl' (growl-left (0.2))
(growl-right (0.8)))))
growl (tiger-location' (tiger-left (growl' (growl-left (0.85))
(growl-right (0.15))))
(tiger-right (growl' (growl-left (0.15))
(growl-right (0.85)))))
endobserve
cost (1)
endaction
Expand Down
13 changes: 7 additions & 6 deletions Protos/src/thinclab/belief/SSGABeliefExpansion.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package thinclab.belief;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;

Expand Down Expand Up @@ -49,7 +50,7 @@ public class SSGABeliefExpansion extends BeliefRegionExpansionStrategy {
/* number of iterations of SSGA during each expansion */
private int nIterations;

private static final Logger logger = Logger.getLogger(SSGABeliefExpansion.class);
private static final Logger LOGGER = Logger.getLogger(SSGABeliefExpansion.class);

// ----------------------------------------------------------------------------------------

Expand All @@ -70,7 +71,7 @@ public SSGABeliefExpansion(POMDP p, int maxDepth, int iterations) {
* Run a full belief expansion for 3 time steps to facilitate proper exploration
*/

FullBeliefExpansion fb = new FullBeliefExpansion(this.p, 3);
FullBeliefExpansion fb = new FullBeliefExpansion(this.p, 1);
fb.expand();

this.initialBeliefs = new ArrayList<DD>();
Expand All @@ -81,7 +82,7 @@ public SSGABeliefExpansion(POMDP p, int maxDepth, int iterations) {

fb = null;

logger.debug("SSGA expansion search initialized");
LOGGER.debug("SSGA expansion search initialized");
}

public SSGABeliefExpansion(IPOMDP ip, int iterations) {
Expand Down Expand Up @@ -119,7 +120,7 @@ public void expand() {
* Run SSGA expansion for nIterations
*/

logger.debug("Starting " + this.nIterations
LOGGER.debug("Starting " + this.nIterations
+ " expansions till depth " + this.getHBound()
+ " from " + this.initialBeliefs.size() + " belief points.");

Expand Down Expand Up @@ -152,7 +153,7 @@ public void expand() {
else act = Global.random.nextInt(this.f.getActions().size());

DD obsDist = this.f.getObsDist(belief, this.f.getActions().get(act));

int[][] obsConfig = null;

if (this.f.getType().contentEquals("POMDP"))
Expand Down Expand Up @@ -209,7 +210,7 @@ public void expand() {

} /* for iterations */

logger.debug("Total beliefs explored are " + this.exploredBeliefs.size());
LOGGER.debug("Total beliefs explored are " + this.exploredBeliefs.size());
}

@Override
Expand Down
1 change: 1 addition & 0 deletions Protos/src/thinclab/ddinterface/DDMaker.java
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ public DDTree getDDTreeFromSequence(String[] varSequence, String[][] values) {
}

try {
// logger.debug("Record " + record + " is " + Arrays.toString(currentRecord));
currentNode = currentNode.atChild(currentRecord[c]);
}
catch (Exception e) {
Expand Down
39 changes: 38 additions & 1 deletion Protos/src/thinclab/decisionprocesses/DecisionProcess.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import java.util.List;
import java.util.stream.Collectors;

import org.apache.commons.lang3.ArrayUtils;

import thinclab.belief.BeliefOperations;
import thinclab.exceptions.VariableNotFoundException;
import thinclab.exceptions.ZeroProbabilityObsException;
Expand Down Expand Up @@ -46,13 +48,20 @@ public abstract class DecisionProcess implements Serializable {
public abstract List<DD> getInitialBeliefs();
public abstract DD getCurrentBelief();
public abstract int[] getStateVarIndices();
public abstract int[] getStateVarPrimeIndices();
public abstract int[] getObsVarIndices();
public abstract int[] getObsVarPrimeIndices();
public abstract void setGlobals();
public abstract String getType();
public abstract String getBeliefString(DD belief);
public abstract DD getRewardFunctionForAction(String action);
public abstract DD[] getTiForAction(String action);
public abstract DD[] getOiForAction(String action);
public abstract void step(DD belief, String action, String[] obs) throws Exception;
public abstract void setTi(String action, DD[] Ti);
public abstract int getNumVars();
public abstract double evaluatePolicy(
DD[] alphaVectors, int[] policy, int trials, int evalDepth, boolean verbose);

// ---------------------------------------------------------------------------------

Expand All @@ -70,6 +79,23 @@ public static String getActionFromPolicy(
return bestAction;
}

public static String[] configToStrings(int[][] config) {

/*
* Converts config arrays to their String value representations
*
* Mostly used to convert obsConfigs to Observation value arrays
*/

String[] vals = new String[config[0].length];

for (int varI = 0; varI < config[0].length; varI ++) {
vals[varI] = Global.valNames[config[0][varI] - 1][config[1][varI] - 1];
}

return vals;
}

// --------------------------------------------------------------------------------

public static int getVarIndex(String varName) throws VariableNotFoundException {
Expand Down Expand Up @@ -113,10 +139,21 @@ public static int getBestAlphaIndex(DecisionProcess DP, DD belief, DD[] alphaVec
double val;
int bestAlphaId = 0;

int[] varIndices = null;

if (DP.getType().contentEquals("IPOMDP"))
varIndices =
ArrayUtils.subarray(
DP.getStateVarIndices(),
0, ((IPOMDP) DP).thetaVarPosition);

else
varIndices = DP.getStateVarIndices();

double[] values = new double[alphaVectors.length];
for (int alphaId = 0; alphaId < alphaVectors.length; alphaId++) {

val = OP.dotProduct(belief, alphaVectors[alphaId], DP.getStateVarIndices());
val = OP.dotProduct(belief, alphaVectors[alphaId], varIndices);
values[alphaId] = val;

if (val >= bestVal) {
Expand Down
Loading

0 comments on commit 37a6a04

Please sign in to comment.