Commit 07b702df authored by Joseph Walton-Rivers's avatar Joseph Walton-Rivers

add comments to MCTS varients

parent 6ee9512d
Pipeline #1422 passed with stages
in 2 minutes 39 seconds
......@@ -9,7 +9,9 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A version of the MCTS agent that replaces the random rollout with policy based rollouts.
* A version of the MCTS agent that replaces the random rollout with policy based roll-outs.
*
* This variant uses standard MCTS for all agent's moves in the tree, and then uses the policy for roll-outs.
*/
public class MCTSPolicy extends MCTS {
private final Logger LOG = LoggerFactory.getLogger(MCTSPolicy.class);
......@@ -33,9 +35,11 @@ public class MCTSPolicy extends MCTS {
/**
* Rather than perform a random move, query a policy for one.
*
* @param state
* @param playerID
* @return
* Consult the policy provided when creating the agent for all agent's moves.
*
* @param state the current game state
* @param playerID the current player ID
* @return the move that the policy has selected
*/
@Override
protected Action selectActionForRollout(GameState state, int playerID) {
......
......@@ -16,7 +16,11 @@ import java.util.Arrays;
import java.util.List;
/**
* Created by WebPigeon on 09/08/2016.
* Use MCTS for our nodes, and policies for other players.
*
* This agent will use UCT for our nodes, but paired agents will consult the policy rather than performing a standard
* expansion. This is often quicker than performing a complete expansion of the nodes and will take into account what
* the paired agent will do rather than settling on the highest scoring nodes for them.
*/
public class MCTSPredictor extends MCTS {
protected Agent[] agents;
......
......@@ -12,7 +12,10 @@ import java.util.List;
import java.util.Random;
/**
* Created by webpigeon on 27/01/17.
* A filter agent which will consult its policy with 1-threshold probabilty, i.e. 0.4 -> execute policy 60% of the time,
* execute a random move otherwise.
*
* This was created to test the agent's ability to work with noisy/inaccurate policies.
*/
public class NoisyPredictor implements Agent {
private double threshold;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment