airbus · fteicht · May 17, 2024 · May 5, 2024 · May 6, 2024 · May 6, 2024
diff --git a/cpp/src/hub/solver/aostar/aostar.hh b/cpp/src/hub/solver/aostar/aostar.hh
@@ -9,12 +9,24 @@
 #include <memory>
 #include <unordered_set>
 #include <list>
+#include <queue>
 
 #include "utils/associative_container_deducer.hh"
 #include "utils/execution.hh"
 
 namespace skdecide {
 
+/**
+ * @brief This is the skdecide implementation of the AO* algorithm for searching
+ * cost-minimal policies in additive AND/OR graphs with admissible heuristics
+ * as described in "Principles of Artificial Intelligence" by Nilsson, N. (1980)
+ *
+ * @tparam Tdomain Type of the domain class
+ * @tparam Texecution_policy Type of the execution policy (one of
+ * 'SequentialExecution' to generate state-action transitions in sequence,
+ * or 'ParallelExecution' to generate state-action transitions in parallel on
+ * different threads)
+ */
 template <typename Tdomain, typename Texecution_policy = SequentialExecution>
 class AOStarSolver {
 public:
@@ -25,31 +37,146 @@ public:
   typedef typename Domain::Value Value;
   typedef Texecution_policy ExecutionPolicy;
 
+  typedef std::function<Predicate(Domain &, const State &)> GoalCheckerFunctor;
+  typedef std::function<Value(Domain &, const State &)> HeuristicFunctor;
+  typedef std::function<bool(const AOStarSolver &, Domain &)> CallbackFunctor;
+
+  /**
+   * @brief Construct a new AOStarSolver object
+   *
+   * @param domain The domain instance
+   * @param goal_checker Functor taking as arguments the domain and a state
+   * object, and returning true if the state is the goal
+   * @param heuristic Functor taking as arguments the domain and a state object,
+   * and returning the heuristic estimate from the state to the goal
+   * @param discount Value function's discount factor
+   * @param max_tip_expansions Maximum number of states to extract from the
+   * priority queue at each iteration before recomputing the policy graph
+   * @param detect_cycles Boolean indicating whether cycles in the search graph
+   * should be automatically detected (true) or not (false), knowing that the
+   * AO* algorithm is not meant to work with graph cycles into which it might be
+   * infinitely trapped
+   * @param callback Functor called before popping the next state from the
+   * priority queue, taking as arguments the solver and the domain, and
+   * returning true if the solver must be stopped
+   * @param verbose Boolean indicating whether verbose messages should be
+   * logged (true) or not (false)
+   */
   AOStarSolver(
-      Domain &domain,
-      const std::function<Predicate(Domain &, const State &)> &goal_checker,
-      const std::function<Value(Domain &, const State &)> &heuristic,
-      double discount = 1.0, std::size_t max_tip_expansions = 1,
-      bool detect_cycles = false, bool debug_logs = false);
-
-  // clears the solver (clears the search graph, thus preventing from reusing
-  // previous search results)
+      Domain &domain, const GoalCheckerFunctor &goal_checker,
+      const HeuristicFunctor &heuristic, double discount = 1.0,
+      std::size_t max_tip_expansions = 1, bool detect_cycles = false,
+      const CallbackFunctor &callback = [](const AOStarSolver &,
+                                           Domain &) { return false; },
+      bool verbose = false);
+
+  /**
+   * @brief Clears the search graph, thus preventing from reusing previous
+   * search results)
+   *
+   */
   void clear();
 
-  // solves from state s using heuristic function h
+  /**
+   * @brief Call the AO* algorithm
+   *
+   * @param s Root state of the search from which AO* graph traversals are
+   * performed
+   */
   void solve(const State &s);
+
+  /**
+   * @brief Indicates whether the solution policy is defined for a given state
+   *
+   * @param s State for which an entry is searched in the policy graph
+   * @return true If the state has been explored and an action is defined in
+   * this state
+   * @return false If the state has not been explored or no action is defined in
+   * this state
+   */
   bool is_solution_defined_for(const State &s) const;
+
+  /**
+   * @brief Get the best computed action in terms of best Q-value in a given
+   * state (throws a runtime error exception if no action is defined in the
+   * given state, which is why it is advised to call
+   * AOStarSolver::is_solution_defined_for before).
+   *
+   * @param s State for which the best action is requested
+   * @return const Action& Best computed action
+   */
   const Action &get_best_action(const State &s) const;
-  const double &get_best_value(const State &s) const;
+
+  /**
+   * @brief Get the best Q-value in a given state (throws a runtime
+   * error exception if no action is defined in the given state, which is why it
+   * is advised to call AOStarSolver::is_solution_defined_for before)
+   *
+   * @param s State from which the best Q-value is requested
+   * @return double Minimum Q-value of the given state over the applicable
+   * actions in this state
+   */
+  Value get_best_value(const State &s) const;
+
+  /**
+   * @brief Get the number of states present in the search graph
+   *
+   * @return std::size_t Number of states present in the search graph
+   */
+  std::size_t get_nb_explored_states() const;
+
+  /**
+   * @brief Get the set of states present in the search graph (i.e. the graph's
+   * state nodes minus the nodes' encapsulation and their children)
+   *
+   * @return SetTypeDeducer<State>::Set Set of states present in the search
+   * graph
+   */
+  typename SetTypeDeducer<State>::Set get_explored_states() const;
+
+  /**
+   * @brief Get the number of states present in the priority queue (i.e. those
+   * explored states that have not been yet expanded)
+   *
+   * @return std::size_t Number of states present in the priority queue
+   */
+  std::size_t get_nb_tip_states() const;
+
+  /**
+   * @brief Get the top tip state, i.e. the tip state with the lowest value
+   * function
+   *
+   * @return const State& Next tip state to be expanded by the algorithm
+   */
+  const State &get_top_tip_state() const;
+
+  /**
+   * @brief Get the solving time in milliseconds since the beginning of the
+   * search from the root solving state
+   *
+   * @return std::size_t Solving time in milliseconds
+   */
+  std::size_t get_solving_time() const;
+
+  /**
+   * @brief Get the (partial) solution policy defined for the states for which
+   * the Q-value has been updated at least once (which is optimal for the
+   * non-tip states reachable by this policy
+   *
+   * @return Mapping from states to pairs of action and best Q-value
+   */
+  typename MapTypeDeducer<State, std::pair<Action, Value>>::Map
+  get_policy() const;
 
 private:
   Domain &_domain;
-  std::function<bool(Domain &, const State &)> _goal_checker;
-  std::function<Value(Domain &, const State &)> _heuristic;
+  GoalCheckerFunctor _goal_checker;
+  HeuristicFunctor _heuristic;
   double _discount;
   std::size_t _max_tip_expansions;
   bool _detect_cycles;
-  bool _debug_logs;
+  CallbackFunctor _callback;
+  bool _verbose;
   ExecutionPolicy _execution_policy;
 
   struct ActionNode;
@@ -85,6 +212,13 @@ private:
 
   typedef typename SetTypeDeducer<StateNode, State>::Set Graph;
   Graph _graph;
+
+  typedef std::priority_queue<StateNode *, std::vector<StateNode *>,
+                              StateNodeCompare>
+      PriorityQueue;
+  PriorityQueue _priority_queue;
+
+  std::chrono::time_point<std::chrono::high_resolution_clock> _start_time;
 };
 
 } // namespace skdecide