Commit d60230c5 authored by Joseph Walton-Rivers's avatar Joseph Walton-Rivers 🐦

Merge branch 'feature_comp2019' into 'master'

2019 Competition - Changes for Mixed and Mirror tracks

See merge request !6
parents 1c41095f b8bf7dd7
Pipeline #2160 passed with stages
in 5 minutes and 12 seconds
......@@ -5,4 +5,30 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
\ No newline at end of file
## v1.2.4 (unreleased)
### Added
- New history features allow for easier processing of game events ( issue #6 )
- GetMovesLeft is now visible ( issue #7 )
- Allow overwriting the default policy for production rule agents ( issue #9 )
- Better documentation for the state interface
### Changes
- Agents no longer need to manage history when forwarding the state, this will be handled by the action
- The copy constructor is no longer visible for BasicState, instead the getCopy method should be used
- The game will handle calling tick for the agents, calling tick will now result in a warning message being printed.
### Fixes
- Fix spelling of information in state interface
- Cloning a 'no life state' should result in a no-life state
### Removed
- addEvent method has been removed as it is not compatible with new history system, getHistory has been ported
- Usage of deprecated methods has been reduced
- Old experiments MixedAgentGame and RiskyRunner have been deprecated
- human UI no longer outputs the current turn number to terminal
## v1.2.3
### Added
- ZeroLife state which matches the suggestion by deepmind with regards to the handling of no lives left
......@@ -14,6 +14,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
/**
* A basic runner for the game of Hanabi.
......@@ -114,20 +115,30 @@ public class GameRunner {
state.init(seed);
//let the players know the game has started and the starting state
send(new GameInformation(nPlayers, HAND_SIZE[nPlayers], state.getInfomation(), state.getLives()));
//keep track of the messages that should be sent as part of the game setup
List<GameEvent> initEvents = new ArrayList<>();
//tell players about their hands
// tell the players the rules
GameEvent gameInfo = new GameInformation(nPlayers, HAND_SIZE[nPlayers], state.getInfomation(), state.getLives());
initEvents.add(gameInfo);
//tell players about the initial state
for (int player = 0; player < players.length; player++) {
Hand hand = state.getHand(player);
for (int slot = 0; slot < hand.getSize(); slot++) {
Card cardInSlot = hand.getCard(slot);
send(new CardDrawn(player, slot, cardInSlot.colour, cardInSlot.value));
send(new CardReceived(player, slot, state.getDeck().hasCardsLeft()));
GameEvent cardDrawn = new CardDrawn(player, slot, cardInSlot.colour, cardInSlot.value, 0);
GameEvent cardRecv = new CardReceived(player, slot, state.getDeck().hasCardsLeft(), 0);
initEvents.add(cardDrawn);
initEvents.add(cardRecv);
}
}
//dispatch the events to the players
notifyAction(-2, null, initEvents);
long endTime = getTick();
logger.info("Game init complete: took {} ms", endTime - startTime);
}
......@@ -169,8 +180,10 @@ public class GameRunner {
//perform the action and get the effects
logger.info("player {} made move {} as turn {}", nextPlayer, action, moves);
moves++;
Collection<GameEvent> events = action.apply(nextPlayer, state);
events.forEach(this::send);
notifyAction(nextPlayer, action, events);
//make sure it's the next player's turn
nextPlayer = (nextPlayer + 1) % players.length;
......@@ -193,7 +206,6 @@ public class GameRunner {
while (!state.isGameOver()) {
try {
state.tick();
writeState(state);
nextMove();
} catch (RulesViolation rv) {
......@@ -215,24 +227,52 @@ public class GameRunner {
}
/**
* Tell the players about an action that has occurred
*
* @param actor the player who performed the action
* @param action the action the player performed
* @param events the events that resulted from that action
*/
protected void notifyAction(int actor, Action action, Collection<GameEvent> events) {
for (int i = 0; i < players.length; i++) {
int currPlayer = i; // use of lambda expression must be effectively final
// filter events to just those that are visible to the player
List<GameEvent> visibleEvents = events.stream().filter(e -> e.isVisibleTo(currPlayer)).collect(Collectors.toList());
players[i].resolveTurn(actor, action, visibleEvents);
logger.debug("for {}, sent {} to {}", action, visibleEvents, currPlayer);
}
}
//send messages as soon as they are available
protected void send(GameEvent event) {
/* protected void send(GameEvent event) {
logger.debug("game sent event: {}", event);
for (int i = 0; i < players.length; i++) {
if (event.isVisibleTo(i)) {
players[i].sendMessage(event);
}
}
}
}*/
public static void main(String[] args) {
Random random = new Random();
List<GameStats> results = new ArrayList<>();
for(int players = 2; players <= 5; players++){
for(int gameNumber = 0; gameNumber < 1000; gameNumber++){
GameRunner runner = new GameRunner("IGGI2-" + gameNumber, players);
for(int gameNumber = 0; gameNumber < 10; gameNumber++){
GameRunner runner = new GameRunner("IGGI2-" + gameNumber, players, true);
int evalAgent = random.nextInt(players);
for(int i = 0; i < players; i++){
runner.addPlayer(new AgentPlayer("IGGI2", AgentUtils.buildAgent("iggi2")));
if (evalAgent == i) {
runner.addPlayer(new AgentPlayer("eval", AgentUtils.buildAgent("pmctsND[iggi|iggi|iggi|iggi|iggi]")));
} else{
runner.addPlayer(new AgentPlayer("iggi", AgentUtils.buildAgent("iggi")));
}
}
GameStats stats = runner.playGame(random.nextLong());
......
package com.fossgalaxy.games.fireworks;
import com.fossgalaxy.games.fireworks.state.actions.Action;
import com.fossgalaxy.games.fireworks.state.events.CardDrawn;
import com.fossgalaxy.games.fireworks.state.events.CheatEvent;
import com.fossgalaxy.games.fireworks.state.events.GameEvent;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* Game runner that allows the agents to know what cards are in their hands.
*/
......@@ -14,24 +19,30 @@ public class GameRunnerCheat extends GameRunner {
}
/**
* When a card is drawn, all agents get told what the card was.
*
* This allows the agents to know exactly what cards are in their own hand and tells the other agents about it.
* This is implemented as the equivalent of a free tell action.
* Tell the players about an action that has occurred
*
* @param event the event to send to the agents.
* @param actor the player who performed the action
* @param action the action the player performed
* @param events the events that resulted from that action
*/
@Override
protected void send(GameEvent event) {
protected void notifyAction(int actor, Action action, Collection<GameEvent> events) {
for (int i = 0; i < players.length; i++) {
if (event.isVisibleTo(i)) {
players[i].sendMessage(event);
}
// filter events to just those that are visible to the player
List<GameEvent> visibleEvents = new ArrayList<>();
for (GameEvent event : events) {
if (event.isVisibleTo(i)) {
visibleEvents.add(event);
}
if (event instanceof CardDrawn) {
send(new CheatEvent(i, state.getHand(i)));
if (event instanceof CardDrawn) {
visibleEvents.add(new CheatEvent(i, state.getHand(i), event.getTurnNumber()));
}
}
players[i].resolveTurn(actor, action, visibleEvents);
}
}
}
......@@ -11,6 +11,7 @@ import java.util.Random;
* <p>
* This runner paired with
*/
@Deprecated
public class MixedAgentGame {
//utility class - no instances required.
......
......@@ -9,6 +9,7 @@ import java.util.Random;
/**
* Created by piers on 08/11/16.
*/
@Deprecated
public class RiskyRunner {
public static final String IGGI_RISKY = "iggi_risky";
......
......@@ -6,6 +6,7 @@ import com.fossgalaxy.games.fireworks.state.GameState;
import com.fossgalaxy.games.fireworks.state.actions.Action;
import com.fossgalaxy.games.fireworks.state.events.GameEvent;
import java.util.List;
import java.util.Objects;
/**
......@@ -41,13 +42,32 @@ public class AgentPlayer implements Player {
return policy.doMove(playerID, state);
}
/**
* Resolve a turn.
*
* We don't know enouph to apply the action (ie, the deck order and our own cards).
* Instead, we use the effects of the action (Events) to update our state.
* Because the action is never executed on our side, our tick counter won't update, so do that to.
*
* @param actor the player who just made a move
* @param action the move the player made
* @param what we saw happen when the move was made
*/
@Override
public void sendMessage(GameEvent msg) {
assert state != null : "You didn't call setID before I got a message!";
assert msg != null : "You passed me a null message";
public void resolveTurn(int actor, Action action, List<GameEvent> events) {
Objects.requireNonNull(state);
Objects.requireNonNull(events);
// add the action to the history
state.addAction(actor, action, events);
// apply the effects of the actions
for (GameEvent event : events){
event.apply(state, this.playerID);
}
msg.apply(state, playerID);
state.addEvent(msg);
// tick the state
state.actionTick();
}
@Override
......
......@@ -76,7 +76,6 @@ public class Individual {
Action myAction = possible.get(random.nextInt(possible.size()));
myAction.apply(playerId, forward);
}
forward.tick();
playerId = (playerId + 1) % state.getPlayerCount();
}
......
......@@ -295,14 +295,14 @@ public class HatGuessing implements Agent {
switch (event.getEvent()) {
case CARD_INFO_COLOUR: {
CardInfoColour tellColour = (CardInfoColour) event;
int recommendation = 4 + getEncodedValue(tellColour.getPerformer(), tellColour.getPlayerId());
int recommendation = 4 + getEncodedValue(tellColour.getPerformer(), tellColour.getPlayerTold());
lastToldAction = Recommendation.values()[getMissingPiece(state, tellColour.getPerformer(), recommendation)];
cardsPlayedSinceHint = 0;
break;
}
case CARD_INFO_VALUE: {
CardInfoValue tellValue = (CardInfoValue) event;
int recommendation = getEncodedValue(tellValue.getPerformer(), tellValue.getPlayerId());
int recommendation = getEncodedValue(tellValue.getPerformer(), tellValue.getPlayerTold());
lastToldAction = Recommendation.values()[getMissingPiece(state, tellValue.getPerformer(), recommendation)];
cardsPlayedSinceHint = 0;
break;
......
......@@ -23,6 +23,21 @@ public class IGGIFactory {
}
/**
* This bot does nothing but tell and discard cards at random.
*
* This agent is always guaranteed to return a move, but will never play cards.
*
* @return the old default policy for the production rule agent.
*/
@AgentBuilderStatic("forgiving")
public static Agent buildForgivingPolicy(){
ProductionRuleAgent pra = new ProductionRuleAgent();
pra.addRule(new TellRandomly());
pra.addRule(new DiscardRandomly());
return pra;
}
/**
* Cautious but helpful bot.
* <p>
......
......@@ -25,6 +25,7 @@ public class MCTS implements Agent {
public static final int DEFAULT_ROLLOUT_DEPTH = 18;
public static final int DEFAULT_TREE_DEPTH_MUL = 1;
public static final int NO_LIMIT = 100;
protected static final boolean OLD_UCT_BEHAVIOUR = false;
protected final int roundLength;
protected final int rolloutDepth;
......@@ -111,7 +112,7 @@ public class MCTS implements Agent {
deck.shuffle();
MCTSNode current = select(root, currentState, iterationObject);
int score = rollout(currentState, agentID, current);
int score = rollout(currentState, current);
current.backup(score);
if(calcTree){
System.err.println(root.printD3());
......@@ -144,16 +145,20 @@ public class MCTS implements Agent {
protected MCTSNode select(MCTSNode root, GameState state, IterationObject iterationObject) {
MCTSNode current = root;
int treeDepth = calculateTreeDepthLimit(state);
while (!state.isGameOver() && current.getDepth() < treeDepth) {
boolean expandedNode = false;
while (!state.isGameOver() && current.getDepth() < treeDepth && !expandedNode) {
MCTSNode next;
if (current.fullyExpanded(state)) {
next = current.getUCTNode(state);
} else {
next = expand(current, state);
return next;
expandedNode = true;
}
if (next == null) {
//XXX if all follow on states explored so far are null, we are now a leaf node
//ok to early return here - we will have applied current last time round the loop!
return current;
}
current = next;
......@@ -164,9 +169,7 @@ public class MCTS implements Agent {
Action action = current.getAction();
if (action != null) {
List<GameEvent> events = action.apply(agent, state);
events.forEach(state::addEvent);
state.tick();
action.apply(agent, state);
}
if (iterationObject.isMyGo(agent)) {
......@@ -236,16 +239,13 @@ public class MCTS implements Agent {
return listAction.get(0);
}
protected int rollout(GameState state, final int agentID, MCTSNode current) {
int playerID = agentID;
protected int rollout(GameState state, MCTSNode current) {
int playerID = (current.getAgent() + 1) % state.getPlayerCount();
int moves = 0;
while (!state.isGameOver() && moves < rolloutDepth) {
Action action = selectActionForRollout(state, playerID);
List<GameEvent> events = action.apply(playerID, state);
events.forEach(state::addEvent);
state.tick();
action.apply(playerID, state);
playerID = (playerID + 1) % state.getPlayerCount();
moves++;
}
......
......@@ -9,7 +9,9 @@ import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.stream.Collectors;
......@@ -36,7 +38,9 @@ public class MCTSNode {
private double score;
private int visits;
private int parentWasVisitedAndIWasLegal;
private int parentWasVisitedAndIWasLegalOld;
protected Map<Action, Integer> legalChildVisits;
protected final StatsSummary rolloutScores;
protected final StatsSummary rolloutMoves;
......@@ -73,6 +77,8 @@ public class MCTSNode {
this.random = new Random();
this.depth = (parent == null) ? 0 : parent.depth + 1;
this.legalChildVisits = new HashMap<>();
this.rolloutScores = new BasicStats();
this.rolloutMoves = new BasicStats();
......@@ -89,7 +95,8 @@ public class MCTSNode {
return 0;
}
return ((score / MAX_SCORE) / visits) + (expConst * Math.sqrt(Math.log(parentWasVisitedAndIWasLegal) / visits));
int legalVisits = MCTS.OLD_UCT_BEHAVIOUR ? parentWasVisitedAndIWasLegalOld : parent.legalChildVisits.get(moveToState);
return ((score / MAX_SCORE) / visits) + (expConst * Math.sqrt(Math.log(legalVisits) / visits));
}
public List<MCTSNode> getChildren() {
......@@ -123,7 +130,10 @@ public class MCTSNode {
if (!moveToMake.isLegal(child.agentId, state)) {
continue;
}
child.parentWasVisitedAndIWasLegal++;
child.parentWasVisitedAndIWasLegalOld++;
updateVisitCount(moveToMake);
double childScore = child.getUCTValue() + (random.nextDouble() * EPSILON);
if (childScore > bestScore) {
......@@ -131,9 +141,22 @@ public class MCTSNode {
bestChild = child;
}
}
//now, update all children we haven't expanded yet, but we could have done
int nextPlayer = (getAgent() + 1) % state.getPlayerCount();
for (Action unexpandedAction : allUnexpandedActions) {
if (unexpandedAction.isLegal(nextPlayer, state)) {
updateVisitCount(unexpandedAction);
}
}
return bestChild;
}
protected void updateVisitCount(Action action) {
int current = legalChildVisits.getOrDefault(action, 0);
legalChildVisits.put(action, current + 1);
}
public int getAgent() {
return agentId;
......
......@@ -76,8 +76,9 @@ public class MCTSPredictor extends MCTS {
protected MCTSNode select(MCTSNode root, GameState state, IterationObject iterationObject) {
MCTSNode current = root;
int treeDepth = calculateTreeDepthLimit(state);
boolean expanded = false;
while (!state.isGameOver() && current.getDepth() < treeDepth) {
while (!state.isGameOver() && current.getDepth() < treeDepth && !expanded) {
MCTSNode next;
if (current.fullyExpanded(state)) {
next = current.getUCTNode(state);
......@@ -92,11 +93,14 @@ public class MCTSPredictor extends MCTS {
if (numChildren != current.getChildSize()) {
// It is new
return next;
expanded = true;
//return next;
}
}
// Forward the state
if (next == null) {
//ok to early return, current advanced in last game tick
return current;
}
current = next;
......@@ -106,9 +110,7 @@ public class MCTSPredictor extends MCTS {
int agent = current.getAgent();
Action action = current.getAction();
if (action != null) {
List<GameEvent> events = action.apply(agent, state);
events.forEach(state::addEvent);
state.tick();
action.apply(agent, state);
}
if (iterationObject.isMyGo(agent)) {
......
......@@ -129,13 +129,16 @@ public class MCTSExpConst implements Agent {
protected MCTSNode select(MCTSNode root, GameState state, IterationObject iterationObject) {
MCTSNode current = root;
int treeDepth = calculateTreeDepthLimit(state);
while (!state.isGameOver() && current.getDepth() < treeDepth) {
boolean expanded = false;
while (!state.isGameOver() && current.getDepth() < treeDepth && !expanded) {
MCTSNode next;
if (current.fullyExpanded(state)) {
next = current.getUCTNode(state);
} else {
next = expand(current, state);
return next;
expanded = true;
//return next;
}
if (next == null) {
//XXX if all follow on states explored so far are null, we are now a leaf node
......@@ -149,9 +152,7 @@ public class MCTSExpConst implements Agent {
Action action = current.getAction();
if (action != null) {
List<GameEvent> events = action.apply(agent, state);
events.forEach(state::addEvent);
state.tick();
action.apply(agent, state);
}
if (iterationObject.isMyGo(agent)) {
......@@ -228,9 +229,7 @@ public class MCTSExpConst implements Agent {
while (!state.isGameOver() && moves < rolloutDepth) {
Action action = selectActionForRollout(state, playerID);
List<GameEvent> events = action.apply(playerID, state);
events.forEach(state::addEvent);
state.tick();
action.apply(playerID, state);
playerID = (playerID + 1) % state.getPlayerCount();
moves++;
}
......
......@@ -73,8 +73,9 @@ public class MCTSPredictorExpConst extends MCTSExpConst {
protected MCTSNode select(MCTSNode root, GameState state, IterationObject iterationObject) {
MCTSNode current = root;
int treeDepth = calculateTreeDepthLimit(state);
boolean expanded = false;
while (!state.isGameOver() && current.getDepth() < treeDepth) {
while (!state.isGameOver() && current.getDepth() < treeDepth && !expanded) {
MCTSNode next;
if (current.fullyExpanded(state)) {
next = current.getUCTNode(state);
......@@ -89,7 +90,8 @@ public class MCTSPredictorExpConst extends MCTSExpConst {
if (numChildren != current.getChildSize()) {
// It is new
return next;
expanded = true;
//return next;
}
}
// Forward the state
......@@ -103,9 +105,7 @@ public class MCTSPredictorExpConst extends MCTSExpConst {
int agent = current.getAgent();
Action action = current.getAction();
if (action != null) {
List<GameEvent> events = action.apply(agent, state);
events.forEach(state::addEvent);
state.tick();
action.apply(agent, state);
}
if (iterationObject.isMyGo(agent)) {
......
......@@ -16,15 +16,32 @@ public class ProductionRuleAgent implements Agent {
private final Logger logger = LoggerFactory.getLogger(ProductionRuleAgent.class);
protected List<Rule> rules;
protected Agent defaultPolicy;
public ProductionRuleAgent() {
this.rules = new ArrayList<>();
this.defaultPolicy = null;
}
public void addRule(Rule rule) {
rules.add(rule);
}
/**
* Policy to delegate to if no rule fired.
*
* If this is set to null, the agent will throw an illegal state exception if the ruleset is incomplete.
* If set, then this policy will be called when no rules fired. If you wish to use the old 'forgiving' behaviour,
* set this to IGGIFactory.buildForgivingPolicy().
*
* If you call this method, you must guarantee the policy used is complete.
*
* @param policy the policy of last resort for the agent.
*/
public void setDefaultPolicy(Agent policy){
this.defaultPolicy = policy;
}
@Override
public Action doMove(int agentID, GameState state) {
......@@ -45,7 +62,18 @@ public class ProductionRuleAgent implements Agent {
//default rule based behaviour, discard random if legal, else play random
public Action doDefaultBehaviour(int playerID, GameState state) {
throw new IllegalStateException("No rule fired - your rules are incomplete.");
if (defaultPolicy == null) {
throw new IllegalStateException("No rule fired - your rules are incomplete.");
} else {
Action defaultAction = defaultPolicy.doMove(playerID, state);
if (defaultAction == null) {
//hey! that's not allowed!
throw new IllegalStateException("Default policy failed to return an action.");
}
return defaultAction;
}
}
@Override
......
......@@ -40,7 +40,7 @@ class FirstMoveGameRunner extends GameRunner {
@Override
public GameStats playGame(Long seed) {
init(seed);
state.tick();
//state.tick();
nextMove();
return null;
......
......@@ -11,6 +11,7 @@ import com.fossgalaxy.games.fireworks.state.events.GameEvent;
import com.fossgalaxy.games.fireworks.state.events.MessageType;
import javax.swing.*;
import java.util.List;
/**
* Created by webpigeon on 11/04/17.
......@@ -67,9 +68,13 @@ public class UIPlayer extends AgentPlayer {
}
@Override
public void sendMessage(GameEvent msg) {
super.sendMessage(msg);
public void resolveTurn(int actor, Action action, List<GameEvent> events) {
super.resolveTurn(actor, action, events);
events.forEach(this::sendMessage);
}
public void sendMessage(GameEvent msg) {
if (view != null) {
if (msg.getEvent().equals(MessageType.CARD_INFO_COLOUR)) {
......
......@@ -262,7 +262,7 @@ public class PrettyGameView extends GameView {
public void animateTell(CardInfoValue valueTold) {
super.animateTell(valueTold);
CardHinter hinter