evaluation.tex

\subtitle{Evaluating Performance}
\date{Tuesday, 15 May 2018}

\begin{document}
	
	\begin{frame}
	\titlepage
	\end{frame}

	% What do we want?
	% What do we measure?
	% How do we measure it?
	
	\begin{frame}{What is Player Experience?}
		\begin{block}{Player experience}
			Collection of events that \textbf{occur} to the player \textbf{during} the game
			\note{Should be clear - it is only the events that occur because of the game that are important}	
		\end{block}
	\end{frame}

	\begin{frame}{What is Player Experience?}
		\begin{block}{Scenario}
			Jeffrey is playing an online RTS game, and he is playing with a friend online against two other people.			
		\end{block}
		\begin{block}{Question}
		Which of these are a part of the player experience and which are not?\note{All happen while the person is playing a game}
		\end{block}	
		\begin{tabularx}{7.5cm}{X | l}
				Losing a Unit & \uncover<2->{Yes} \\
				Laundry Finishing & \uncover<3->{No} \\
				Collecting resource & \uncover<4->{Yes} \\
				New message in chat window & \uncover<5->{Yes} \\
				Unit Moving & \uncover<6->{Yes}
		\end{tabularx}
	\note{\\ Anything that occurs during the game and as part of the game is part of the player experience. Which of these can be detected by an AI?}
	\end{frame}
	
	\section{Metrics}
	
	\begin{frame}
		Collect data on how players/bots work
		
		\begin{block}{Activity}
			What kinds of features can we collect?
		\end{block}
	\end{frame}

	\begin{frame}{Data from humans}
		\begin{itemize}[<+->]
			\item{High-level human experience}
				\begin{itemize}
					\item Final game scores?
					\item \textbf{How} long did they play for?
				\end{itemize}
			\item{Biosignals}
				\begin{itemize}
					\item \textbf{Where} did they look?
					\item Galvanic skin response
					\item BCI
				\end{itemize}
			\item{Surveys and interviews}
				\begin{itemize}
					\item Likert Scales
					\item \textbf{Why} did you feel that way?
				\end{itemize}
		\end{itemize}
	\end{frame}

	\begin{frame}{Data from bots}
	\note{\begin{itemize}}
		\begin{itemize}[<+->]
			\item Internal State
				\begin{itemize}
					\item Will depend on bot \textbf{architecture}
					\item Measure state visits in FSM
					\item Did the game make \textbf{full} use of the AI? \note{\item What does it mean if it doesn't make full use of the AI?}
				\end{itemize}
			\item How many times does a bot face a \textbf{difficult} choice?
				\begin{itemize}
					\item What is a difficult choice? \note{\item Difficult Choice: MCTS - near identical branches, GA - No Convergence} \note{\item What does it signify about the game?}
				\end{itemize}
		\end{itemize}
	\note{\end{itemize}}
	\end{frame}

	\begin{frame}{Data from either}
		\note{Some things can be measured regardless of if a human or AI is playing	\begin{itemize}}
		\begin{itemize}[<+->]
			\item Final Score distribution\note{\item How high, variation?}
			\item Game Duration \note{\item Length, range of lengths}
			\item Score ``Drama'' \note{\item Runaway victory?, keep changing hands? loop?}
			\item Statistical distribution of states \note{\item Some states not used at all? Some overused?}
			\item Degree of challenge \note{\item How to measure this?}
		\end{itemize}
	\note{\end{itemize}}
	\end{frame}

	\begin{frame}{Data from populations}
		\begin{itemize}
			\item Variability of scores
			\item Skill-depth
		\end{itemize}
	\end{frame}


	\section{Action Sequences}

	\begin{frame}{Data from either}
		\begin{itemize}
			\item Actions taken
			\item Record the sequence of button-pushes
		\end{itemize}
	\end{frame}

	\begin{frame}{Entropy}
		\note{\begin{itemize}}
		\begin{itemize}[<+->]
			\item Sometimes used to \textbf{interpret} aspects of player experience
			\begin{itemize}
				\item $H(X) =  \sum_{i=1}^{n} P(x_{i})I(x_{i}) = -\sum_{i=1}^{n}P(x_{i})\log_{2}P(x_{i})$ \note{\item We won't worry too much about the middle definition}
				\item Take a fair coin - how much entropy?
				\item $H(fairCoint) = \sum_{i=1}^{2}(\frac{1}{2})\log_{2}(\frac{1}{2}) = -\sum_{i=1}^{2}(\frac{1}{2}) \times (-1) = 1 $ \note{\item Because it is a fair coin - each toss can tell us nothing}
				\item How about an unfair coin? What is the entropy for a coin of probability 0.9?
				\note{\item Whiteboard time if students stuck: \begin{itemize}}
					\note{\item $P(x_{0}) = 0.9, P(x_{1}) = 0.1 $}
				\note{\item Answer is: $ H(dodgyCoin) = -\sum_{i=1}^{2}P(x_{i})\log_{2}P(x_{i}) =  $}
				\note{\item Continued: $ -\Big( (P(x_{0}) \log_{2}P(x_{0})) + (P(x_{1}) \log_{2}P(x_{1})) \Big) = 0.47 $}
				\note{\item Continued: $ -\Big( (0.9 \log_{2}0.9) + (0.1 \log_{2}0.1) \Big) = 0.47 $}
				\note{\end{itemize}}
			\end{itemize}
		\end{itemize}
		\begin{center}
			\uncover<6->{\includegraphics[scale=0.4]{entropy}\footnote<6->{Borrowed from \href{https://en.wikipedia.org/wiki/Entropy_(information_theory)}{wikipedia}}} \note{\item Check our answer from earlier matches the diagram}
		\end{center}
		\note{\end{itemize}}
	\end{frame}

	\begin{frame}{A Game Example}
		%\setbeamercovered{invisible}
		\note{\begin{itemize}}
		\begin{columns}
			\note{\item Some sample 2D location visit counts}
			\begin{column}{0.3\textwidth}
				
				\begin{tabularx}{\linewidth}{l | l | l | l}
					 loc & 0 & 1 & 2  \\
					 \hline
					 0 & 10 & 20 & 15 \\
					 1 & 12 & 35 & 13 \\
					 2 & 15 & 20 & 10 \\
				\end{tabularx}
			\end{column}
			\note{\item Converted into visit counts as fraction of total and then into probability of having visited that location}
			\begin{column}{0.7\textwidth}
				\begin{tabularx}{\linewidth}{l | l | l | l}
					loc & visits & p(loc) & calc \\
					\hline
					0,0 & 10 & 0.067 & $0.067\log_{2}(0.067)$\\
					\uncover<2>{
					0,1 & 12 & 0.08 & $0.080\log_{2}(0.008)$ \\
					0,2 & 15 & 0.1 & $0.100\log_{2}(0.100)$ \\
					1,0 & 20 & 0.134 & $0.134\log_{2}(0.134)$ \\
					1,1 & 35 & 0.234 & $0.234\log_{2}(0.234)$ \\
					1,2 & 20 & 0.134 & $0.134\log_{2}(0.134)$ \\
					2,0 & 15 & 0.1 & $0.100\log_{2}(0.100)$ \\
					2,1 & 13 & 0.0867 & $0.0867\log_{2}(0.0867)$ \\
					2,2 & 10 & 0.067 & $0.067\log_{2}(0.067)$ \\
					\hline}
					& 150 & Total: & \\
				\end{tabularx}
			\end{column}
		\note{\item Then we just perform the math as a giant summation. Computers are good at this}
		\note{\item Except computers are not keen on 0's}
		\end{columns}
	\note{\end{itemize}}
	\end{frame}

	\begin{frame}{A Game Example}
		\begin{block}{Exercise}
			Now you try - in Java. Download the \href{ce810.fosslab.uk/slides_pdf/code/Entropy.java}{here} and calculate the entropy			
		\end{block}
	\end{frame}


	%% METRICS
	% Simon's raw vs computed metrics.
	
	%% SKILL
	% Evaluating skill depth
	
	\begin{frame}{Skill Ratings}
	\note{\begin{itemize}}
		\begin{itemize}[<+->]
			\item How \textbf{good} is a player? \note{\item And how do we represent this?}
			\item What is the \textbf{issue} with win rates? \note{\item Based on observations, was it enough? Watch F1 at one track and use those observations for another?}
			\item If A $>$ B and B $>$ C is A $>$C? \note{\item Usually this is the case in games}
			\note{\item Does close win rates prove a lack of skill depth? No, current set of players doesn't demonstrate it. Like me and Joe playing Pool}
		\end{itemize}
	\note{\end{itemize}}
	\end{frame}	

	\begin{frame}{Elo Ratings}
	\note{\begin{itemize}}
		\begin{itemize}[<+->]
			\item Elo is based on probability \note{\item Designed for chess}
			\item $Elo(A) - Elo(B) = P(A$ beats $B)$ \note{\item Point difference between players denotes the probability of winning}
			\note{\item Advantage of 100 points = 64\% chance of winning
			\item Advantage of 200 points = 76\% chance of winning
			\item Works by taking points from the loser and giving them to the winner. Number transfered proportional to difference between points
			}
		\end{itemize}
		\begin{center}
			\only<3>{\includegraphics[scale=0.5]{elo}\footnote{Borrowed from \href{http://liquipedia.net/starcraft/Elo_rating}{liquipedia}}}
		\end{center}
	\note{\end{itemize}}
	\end{frame}

\end{document}