EPV_demo.tex

\documentclass{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
  \ifdim\Gin@nat@width>\linewidth
    \linewidth
  \else
    \Gin@nat@width
  \fi
}
\makeatother

\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb

\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
 \def\at@end@of@kframe{}%
 \ifinner\ifhmode%
  \def\at@end@of@kframe{\end{minipage}}%
  \begin{minipage}{\columnwidth}%
 \fi\fi%
 \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
 \colorbox{shadecolor}{##1}\hskip-\fboxsep
     % There is no \\@totalrightmargin, so:
     \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
 \MakeFramed {\advance\hsize-\width
   \@totalleftmargin\z@ \linewidth\hsize
   \@setminipage}}%
 {\par\unskip\endMakeFramed%
 \at@end@of@kframe}
\makeatother

\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX

\usepackage{alltt}
\usepackage{fullpage}
\usepackage{booktabs}
\usepackage{amsthm,amsmath,amssymb}
\usepackage{float}
\RequirePackage{natbib}
\usepackage{graphicx}

\title{EPV Demo \\
\Large
Supplement to ``A Multiresolution Stochastic Process Model for Predicting Basketball Possession Outcomes''}
\author{Daniel Cervone, Alex D'Amour, Luke Bornn and Kirk Goldsberry}
\date{}
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}

\maketitle


This document provides a demonstration of the code, methodology, and inferential results for the EPV model discussed in our paper.

\section{Loading the data}

To begin, we must first set the directories containing the supplemental data and code, and install/load all necessary packages.

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{code.dir} \hlkwb{<-} \hlstr{"./code"}
\hlstd{data.dir} \hlkwb{<-} \hlstr{"./data"}
\end{alltt}
\end{kframe}
\end{knitrout}

Now we load the \texttt{csv} file containing a full game of optical tracking data. As mentioned in the paper, data from this game was not used in parameter inference for any model related to EPV.

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dat} \hlkwb{<-} \hlkwd{read.csv}\hlstd{(}\hlkwc{file}\hlstd{=}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/2013_11_01_MIA_BKN.csv"}\hlstd{, data.dir))}
\end{alltt}
\end{kframe}
\end{knitrout}

Each row of \texttt{dat} represents a time point (sampled 25 times per second), and columns include
\begin{table}[!h]
\begin{center}
\begin{tabular}{r|ll}
\toprule
Column & Value & Notes \\
\midrule
\texttt{time} & Real time (ms) & \\
\texttt{game} & Game ID & \\
\texttt{quarter} & Quarter & \\
\texttt{shot\_clock} & Time remaining on shot clock & \texttt{NA} for this game \\
\texttt{game\_clock} & Time remaining in quarter (s) & \\
\texttt{x, y, z} & Ball position (ft) & Court region is $[0, 94] \times [0, 50]$ \\
\texttt{a1\_ent} & ID number of player 1 on away team (\texttt{a1}) & \\
\texttt{a1\_x, a1\_y} & Position of \texttt{a1} & \\
\texttt{a1\_event} & Event code for player \texttt{a1} &  See Table \ref{tab:event_codes} for reference \\
\texttt{a\#\_*, h\#\_*} & As for \texttt{a1} & \\
\bottomrule
\end{tabular}
\caption{Description of variables in optical tracking data sample.}
\label{tab:data_desc}
\end{center}
\end{table}

Let's plot the data for some arbitrary moment in the game in Figure \ref{fig:plot_data}.

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/constants.R"}\hlstd{, code.dir))} \hlcom{# loads libraries and constants used throughout code}
\end{alltt}
\begin{verbatim}
## Warning in install.packages :
##   unable to access index for repository http://www.math.ntnu.no/inla/R/stable/src/contrib:
##   cannot open URL 'http://www.math.ntnu.no/inla/R/stable/src/contrib/PACKAGES'
## Warning in install.packages :
##   package 'INLA' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning in install.packages :
##   unable to access index for repository http://www.math.ntnu.no/inla/R/stable/bin/macosx/contrib/4.0:
##   cannot open URL 'http://www.math.ntnu.no/inla/R/stable/bin/macosx/contrib/4.0/PACKAGES'
## 
## The downloaded binary packages are in
## 	/var/folders/zl/8xhj8rts3yxc506fzd8yw9th0000gp/T//Rtmpn82Yzg/downloaded_packages
\end{verbatim}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in library(INLA): there is no package called 'INLA'}}\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/graphics.R"}\hlstd{, code.dir))}  \hlcom{# graphics/plotting functions}
\hlkwd{par}\hlstd{(}\hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,} \hlnum{0}\hlstd{,} \hlnum{0}\hlstd{,} \hlnum{0}\hlstd{))}
\hlkwd{data.plotter}\hlstd{(dat,} \hlnum{1800}\hlstd{)}
\end{alltt}
\end{kframe}\begin{figure}[H]

{\centering \includegraphics[width=\maxwidth]{figure/plot_data-1} 

}

\caption[Plotting a single moment of optical tracking data]{Plotting a single moment of optical tracking data.}\label{fig:plot_data}
\end{figure}


\end{knitrout}

\subsection{Transformed data}

In this format, the data lacks information necessary for computing EPV. Most importantly, the identity of the ballcarrier is not labeled, and most be inferred by the record of game actions (and positional data). We also need to record the covariates used by our multiresolution transition models, and perform some simple data manipulations, such as rotating all data to the offensive half-court and removing moments where the gameplay is suspended. The following code performs these data tasks:


\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/data_formatting.R"}\hlstd{, code.dir))}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/covariates.R"}\hlstd{, code.dir))}

\hlstd{poss} \hlkwb{<-} \hlkwd{possession.indicator}\hlstd{(dat)} \hlcom{# infer ballcarrier... takes about a minute}
\hlstd{tdat} \hlkwb{<-} \hlkwd{rearrange.data}\hlstd{(dat, poss)} \hlcom{# re-shuffle columns by to ballcarrier... (2 min)}
\hlstd{tdat} \hlkwb{<-} \hlkwd{offensive.halfcourt}\hlstd{(tdat)} \hlcom{# transforming to offensive halfcourt}
\hlstd{tdat} \hlkwb{<-} \hlkwd{offensive.ballcarrier}\hlstd{(tdat)}
\hlstd{touchID} \hlkwb{<-} \hlkwd{get.touchID}\hlstd{(tdat)}
\hlstd{covariates} \hlkwb{<-} \hlkwd{getAllCovars}\hlstd{(tdat)} \hlcom{# get covariates... (3 min)}
\hlstd{tdat} \hlkwb{<-} \hlkwd{data.frame}\hlstd{(tdat,} \hlkwc{touchID}\hlstd{=touchID, covariates)}
\hlkwd{save}\hlstd{(tdat,} \hlkwc{file}\hlstd{=}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/tdat.Rdata"}\hlstd{, data.dir))}
\end{alltt}
\end{kframe}
\end{knitrout}

Or, since this takes few minutes to complete, it may be easier to load a pre-computed version of the transformed data set, \texttt{tdat}:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/tdat.Rdata"}\hlstd{, data.dir))}
\end{alltt}
\end{kframe}
\end{knitrout}

\section{Components of hierarchical models}

\subsection{Player similarity adjacency matrix, $\mathbf{H}$}

The hierarchical models used to estimate parameters for the multiresolution transition models rely on preprocessed data summaries. First, the conditional autoregressive model priors used for many model parameters rely on a graph $\mathbf{H}$ of player similarity, represented using an adjacency matrix. As discussed in the paper, this graph is constructed based on the similarity in players' court occupancy distributions. We can visualize these court occupancy distributions, as well as the similarity scores we calculate between them.

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/playerbases.Rdata"}\hlstd{, data.dir))}
\hlstd{players} \hlkwb{<-} \hlkwd{read.csv}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/players2013.csv"}\hlstd{, data.dir))}
\hlkwd{head}\hlstd{(players)}
\end{alltt}
\begin{verbatim}
##   player_id firstname lastname       position height weight byear rookie position.number
## 1      3306     Elton    Brand Forward-Center     81    254  1979   1999               8
## 2     58293      Kyle   Korver  Guard-Forward     79    212  1981   2003               4
## 3    292401       Lou Williams          Guard     73    175  1986   2005               2
## 4    237675      Paul  Millsap Forward-Center     80    258  1985   2006               8
## 5    280587        Al  Horford Forward-Center     82    250  1986   2007               8
## 6    398043      Jeff   Teague    Point-Guard     74    181  1988   2009               1
\end{verbatim}
\end{kframe}
\end{knitrout}

\texttt{players} is a directory of the 461 NBA players in the 2013-14 season, and \texttt{playerbases.Rdata} contains summaries of their court occupancy patterns. \texttt{df} is the matrix $\mathbf{G}$ from the paper: plotting its rows reveals stark differences in players' spatial occupancy patterns:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{5}\hlstd{)}
  \hlkwd{spatialPlot0}\hlstd{(df[i, ],} \hlkwc{legend}\hlstd{=F)}
\end{alltt}
\end{kframe}\begin{figure}[H]

{\centering \includegraphics[width=\maxwidth]{figure/plot_occupancy-1} 

}

\caption[Court occupancy distributions]{Court occupancy distributions.}\label{fig:plot_occupancy}
\end{figure}


\end{knitrout}

In the paper, we use non-negative matrix factorization to obtain a rank 5 approximation of the court occupancy distribution matrix. The basis surfaces of this approximation, given in Figure 8 of the paper, are reproduced here:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{5}\hlstd{)}
  \hlkwd{spatialPlot0}\hlstd{(nmf.basis[i, ],} \hlkwc{legend}\hlstd{=F)}
\end{alltt}
\end{kframe}\begin{figure}[H]

{\centering \includegraphics[width=\maxwidth]{figure/plot_occupancy_bases-1} 

}

\caption[Court occupancy distribution bases]{Court occupancy distribution bases.}\label{fig:plot_occupancy_bases}
\end{figure}


\end{knitrout}

Projected onto this basis, the court occupancy distributions shown in Figure \ref{fig:plot_occupancy} look like:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{df.lowrank} \hlkwb{<-} \hlstd{nmf.coef} \hlopt{%*%} \hlstd{nmf.basis}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{5}\hlstd{)}
  \hlkwd{spatialPlot0}\hlstd{(df.lowrank[i, ],} \hlkwc{legend}\hlstd{=F)}
\end{alltt}
\end{kframe}\begin{figure}[H]

{\centering \includegraphics[width=\maxwidth]{figure/appx_occupancy-1} 

}

\caption{Low rank court occupancy distributions for players shown in Figure \ref{fig:plot_occupancy}.}\label{fig:appx_occupancy}
\end{figure}


\end{knitrout}

It's better to compute player similarity using distance in the space of basis loadings, rather than the original court occupancy distributions, as such distances are calculated across axes that best describe player variation. We calculate \texttt{K}, a distance matrix comparing the loadings for the court occupancy distributions of all 461 players, then map this to a symmetric adjacency matrix \texttt{H} based on finding each player's closest eight neighbors:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{K} \hlkwb{<-} \hlkwd{matrix}\hlstd{(}\hlnum{NA}\hlstd{,} \hlkwc{nrow}\hlstd{=}\hlkwd{nrow}\hlstd{(df),} \hlkwc{ncol}\hlstd{=}\hlkwd{nrow}\hlstd{(df))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(K))\{}
  \hlstd{this.coef} \hlkwb{<-} \hlstd{nmf.coef[i, ]} \hlopt{/} \hlkwd{sum}\hlstd{(nmf.coef[i, ])}
  \hlstd{K[i, ]} \hlkwb{<-} \hlkwd{apply}\hlstd{(nmf.coef,} \hlnum{1}\hlstd{,} \hlkwa{function}\hlstd{(}\hlkwc{r}\hlstd{)} \hlkwd{sum}\hlstd{((r} \hlopt{/} \hlkwd{sum}\hlstd{(r)} \hlopt{-} \hlstd{this.coef)}\hlopt{^}\hlnum{2}\hlstd{))}
\hlstd{\}}
\hlstd{H} \hlkwb{<-} \hlnum{0} \hlopt{*} \hlstd{K}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(H))\{}
  \hlstd{inds} \hlkwb{<-} \hlkwd{order}\hlstd{(K[i, ])[}\hlnum{1}\hlopt{:}\hlnum{8} \hlopt{+} \hlnum{1}\hlstd{]}
  \hlstd{H[i,inds]} \hlkwb{<-} \hlstd{H[inds, i]} \hlkwb{<-} \hlnum{1}
\hlstd{\}}
\end{alltt}
\end{kframe}
\end{knitrout}

To check any player's ``neighbors'' according to \texttt{H}, we can do (for Al Horford):

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{this.player} \hlkwb{<-} \hlkwd{grep}\hlstd{(}\hlstr{"Horford"}\hlstd{, players}\hlopt{$}\hlstd{lastname)}
\hlkwd{paste}\hlstd{(players}\hlopt{$}\hlstd{firstname, players}\hlopt{$}\hlstd{lastname)[}\hlkwd{which}\hlstd{(H[this.player, ]} \hlopt{==} \hlnum{1}\hlstd{)]}
\end{alltt}
\begin{verbatim}
##  [1] "Brandon Bass"      "J.J. Hickson"      "Andre Drummond"    "Tony Mitchell"    
##  [5] "David Lee"         "Dwight Howard"     "Blake Griffin"     "Zach Randolph"    
##  [9] "Anthony Davis"     "Amar'e Stoudemire" "Jason Maxiell"     "Glen Davis"       
## [13] "DeMarcus Cousins"  "Jonas Valanciunas" "Enes Kanter"
\end{verbatim}
\end{kframe}
\end{knitrout}

\subsection{Spatial effect basis functions}

Similarly, let's load the basis functions that are used in representing the spatial effects in players' macrotransition entry models: we denote these basis functions $\phi_{ji}$, where $i=1, \ldots, 10$, and $j$ indexes shot-taking, four different pass options, and turnovers (recall that for the spatial effects in the shot probability model (Equation 10 in the paper), we use the same basis functions as we do for the shot-taking hazard model). To recreate Figure 6 of the paper, which plots the shot-taking bases, we'd do:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{10}\hlstd{)}
  \hlkwd{spatialPlot1}\hlstd{(take.basis[i, ],} \hlkwc{legend}\hlstd{=F)}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in which(mesh\$loc[, 1] >= 0 \& mesh\$loc[, 1] <= 47 \& mesh\$loc[, 2] >= : object 'mesh' not found}}\end{kframe}
\end{knitrout}

\section{Loading parameters and model estimates}

\subsection{Microtransition model}

Here, we will load and illustrate the results of the multiresolution transition models discussed in Section 3 of the paper. First, let's load the (offensive) microtransition model output for LeBron James, print the parameter estimates, and plot of the acceleration effects $\mu^{\ell}_x, \mu^{\ell}_y$, as in Figure 4 of the paper.

\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{player.id} \hlkwb{<-} \hlstd{players}\hlopt{$}\hlstd{player_id[}\hlkwd{which}\hlstd{(players}\hlopt{$}\hlstd{firstname} \hlopt{==} \hlstr{"LeBron"}\hlstd{)]}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/micros/%s.Rdata"}\hlstd{, data.dir, player.id))}
\hlcom{# x component of LeBron James' micro model during ball possession}
\hlkwd{xtable}\hlstd{(with.ball}\hlopt{$}\hlstd{io.x}\hlopt{$}\hlstd{summary.fixed[,} \hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:35 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrr}
  \hline
 & mean & sd & 0.025quant & 0.5quant & 0.975quant \\ 
  \hline
dif & 0.98 & 0.00 & 0.98 & 0.98 & 0.98 \\ 
  intercept & 0.00 & 0.01 & -0.03 & 0.00 & 0.03 \\ 
   \hline
\end{tabular}
\end{table}

\end{footnotesize}

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{))}
\hlkwd{vectorPlot}\hlstd{(with.ball)}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in inla.mesh.projector(mesh, dims = c(40, 40)): could not find function "{}inla.mesh.projector"{}}}\begin{alltt}
\hlkwd{vectorPlot}\hlstd{(without.ball)}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in inla.mesh.projector(mesh, dims = c(40, 40)): could not find function "{}inla.mesh.projector"{}}}\end{kframe}
\end{knitrout}

The defensive microtransition model is less complicated, and we can fit it very quickly. The code below estimates the same model parameters for all players on defense:

\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/parameters.R"}\hlstd{, code.dir))} \hlcom{# loads many modeling functions}
\hlstd{def.micro} \hlkwb{<-} \hlkwd{microDefModel}\hlstd{(tdat)}
\hlcom{# coefficients are a_x, c_x, and b_x from Equation 6 in paper}
\hlkwd{xtable}\hlstd{(}\hlkwd{summary}\hlstd{(def.micro}\hlopt{$}\hlstd{mod.x)}\hlopt{$}\hlstd{coef[,} \hlnum{1}\hlopt{:}\hlnum{3}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:39 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrr}
  \hline
 & Estimate & Std. Error & t value \\ 
  \hline
(Intercept) & -0.00 & 0.00 & -29.74 \\ 
  def.eps.x[-length(def.eps.x)] & 0.96 & 0.00 & 1315.54 \\ 
  residual.x[-length(residual.x)] & -0.00 & 0.00 & -27.12 \\ 
  opt.eps.x[-length(opt.eps.x)] & 0.00 & 0.00 & 3.99 \\ 
   \hline
\end{tabular}
\end{table}

\end{footnotesize}

\subsection{Macrotransition entry models}

We have six macrotransition entry models (from Section 3.2 of the paper). Each is fit hierarchically for all players in the NBA using the R-INLA software, as discussed in Section 4 of the paper. Let's load the results of the shot-taking macrotransition entry model, and interpret some of the results. 

\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/INLA_TAKE.Rdata"}\hlstd{, data.dir))}
\hlcom{# coefficients for time-varying covariates in shot-taking hazard model}
\hlkwd{xtable}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.fixed[,} \hlnum{1}\hlopt{:}\hlnum{2}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrr}
  \hline
 & mean & sd \\ 
  \hline
(Intercept) & -3.30 & 0.63 \\ 
  dribble & -0.32 & 0.01 \\ 
  ndef & -0.08 & 0.01 \\ 
  ball.lastsec & 0.06 & 0.00 \\ 
  b1 & 1.79 & 0.63 \\ 
  b2 & -1.62 & 0.63 \\ 
  b3 & -0.52 & 0.64 \\ 
  b4 & 0.82 & 0.63 \\ 
  b5 & -6.80 & 0.64 \\ 
  b6 & -1.60 & 0.64 \\ 
  b7 & -3.25 & 0.63 \\ 
  b8 & -2.89 & 0.64 \\ 
  b9 & -3.62 & 0.63 \\ 
  b10 & -0.80 & 0.64 \\ 
   \hline
\end{tabular}
\end{table}

\end{footnotesize}

\texttt{b1} is the coefficient for the loading on the first basis function (Figure \ref{fig:shot_bases}). These are fixed effects, so that player-specific coefficient values are represented as random effects. Parameter inference for the random effects are presented somewhat confusingly in the output from R-INLA. Inference for random effects on the situational covariates are stored in matrices where rows represent different players. For instance, for Chris Bosh, we get the mean, SD, and quantiles of his player-specific \texttt{dribble} parameter\footnote{See Appendix A.1 of the paper for explanations on the meaning of the covariates used} by running:

\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{this.player} \hlkwb{<-} \hlkwd{grep}\hlstd{(}\hlstr{"Bosh"}\hlstd{, players}\hlopt{$}\hlstd{lastname)}
\hlkwd{xtable}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.dribble[this.player,} \hlnum{2}\hlopt{:}\hlnum{6}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrr}
  \hline
 & mean & sd & 0.025quant & 0.5quant & 0.975quant \\ 
  \hline
237 & 0.31 & 0.09 & 0.14 & 0.31 & 0.49 \\ 
   \hline
\end{tabular}
\end{table}

\end{footnotesize}

However, the random effects on the spatial basis coefficients are stacked in a $(1 + 10) \times 461$ matrix (there are 461 players in our full NBA data), with 11 461-row submatrices giving the random effects on the intercept and each 10 basis function coefficient, in order. This matrix is copied across all $11$ corresponding output fields in the \texttt{inla.out\$summary.random} object:

\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{n.player} \hlkwb{<-} \hlkwd{nrow}\hlstd{(players)}
\hlcom{# inference for Chris Bosh's intercept and first basis coefficient}
\hlkwd{xtable}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player} \hlopt{+} \hlnum{0}\hlopt{:}\hlnum{1}\hlstd{,} \hlnum{2}\hlopt{:}\hlnum{6}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrr}
  \hline
 & mean & sd & 0.025quant & 0.5quant & 0.975quant \\ 
  \hline
237 & -0.02 & 0.47 & -0.95 & -0.02 & 0.90 \\ 
  238 & -0.56 & 0.49 & -1.52 & -0.56 & 0.40 \\ 
   \hline
\end{tabular}
\end{table}
\begin{kframe}\begin{alltt}
\hlkwd{xtable}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.b1[this.player} \hlopt{+} \hlnum{0}\hlopt{:}\hlnum{1}\hlstd{,} \hlnum{2}\hlopt{:}\hlnum{6}\hlstd{])} \hlcom{# identical}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrr}
  \hline
 & mean & sd & 0.025quant & 0.5quant & 0.975quant \\ 
  \hline
237 & -0.02 & 0.47 & -0.95 & -0.02 & 0.90 \\ 
  238 & -0.56 & 0.49 & -1.52 & -0.56 & 0.40 \\ 
   \hline
\end{tabular}
\end{table}

\end{footnotesize}

The following code rearranges the output into a single matrix, with each row giving the player-specific parameters' posterior mean (fixed $+$ random effects) for all model components (situational covariates and spatial effects).

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{param.names} \hlkwb{<-} \hlkwd{row.names}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.fixed)}
\hlstd{n} \hlkwb{<-} \hlkwd{nrow}\hlstd{(players)}
\hlstd{player.params} \hlkwb{<-} \hlkwd{matrix}\hlstd{(}\hlnum{NA}\hlstd{,} \hlkwc{nrow}\hlstd{=n,} \hlkwc{ncol}\hlstd{=}\hlkwd{length}\hlstd{(param.names))}
\hlstd{y.fix} \hlkwb{<-} \hlstd{inla.out}\hlopt{$}\hlstd{summary.fixed[,} \hlstr{"mean"}\hlstd{]} \hlcom{# fixed effects}
\hlstd{temp} \hlkwb{<-} \hlkwd{names}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random)}
\hlstd{basis.inds} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlkwd{which}\hlstd{(temp} \hlopt{==} \hlstr{"p.int"}\hlstd{),} \hlkwd{grep}\hlstd{(}\hlstr{"p.b[0-9][0-9]*"}\hlstd{, temp))}
\hlstd{cov.inds} \hlkwb{<-} \hlkwd{setdiff}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwd{length}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random)), basis.inds)}
\hlkwa{for}\hlstd{(pl} \hlkwa{in} \hlnum{1}\hlopt{:}\hlstd{n) \{}
  \hlcom{# add players' random effects to fixed effects}
  \hlstd{y.rand} \hlkwb{<-} \hlkwd{c}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[pl,} \hlstr{"mean"}\hlstd{],}
    \hlkwd{sapply}\hlstd{(cov.inds,}
      \hlkwa{function}\hlstd{(}\hlkwc{k}\hlstd{) inla.out}\hlopt{$}\hlstd{summary.random[[k]][pl,} \hlstr{"mean"}\hlstd{]),}
    \hlstd{inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.b1[pl} \hlopt{+} \hlstd{n} \hlopt{*} \hlstd{(}\hlnum{1}\hlopt{:}\hlstd{n.basis),} \hlstr{"mean"}\hlstd{])}
  \hlstd{player.params[pl, ]} \hlkwb{<-} \hlstd{y.fix} \hlopt{+} \hlstd{y.rand}
\hlstd{\}}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in `[.data.frame`(inla.out\$summary.random\$p.b1, pl + n * (1:n.basis), : object 'n.basis' not found}}\end{kframe}
\end{knitrout}

For Chris Bosh, for instance, we can view his parameter estimates and see where each ranks relative to the rest of the league:

\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{values} \hlkwb{<-} \hlstd{player.params[this.player, ]}
\hlstd{ranks} \hlkwb{<-} \hlkwd{apply}\hlstd{(player.params,} \hlnum{2}\hlstd{,} \hlkwa{function}\hlstd{(}\hlkwc{col}\hlstd{)} \hlkwd{rank}\hlstd{(col)[this.player])} \hlcom{# increasing order}
\hlkwd{xtable}\hlstd{(}\hlkwd{data.frame}\hlstd{(param.names, values, ranks),} \hlkwc{digits}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{2}\hlstd{,}\hlnum{0}\hlstd{))}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rllr}
  \hline
 & param.names & values & ranks \\ 
  \hline
1 & (Intercept) &  & 237 \\ 
  2 & dribble &  & 237 \\ 
  3 & ndef &  & 237 \\ 
  4 & ball.lastsec &  & 237 \\ 
  5 & b1 &  & 237 \\ 
  6 & b2 &  & 237 \\ 
  7 & b3 &  & 237 \\ 
  8 & b4 &  & 237 \\ 
  9 & b5 &  & 237 \\ 
  10 & b6 &  & 237 \\ 
  11 & b7 &  & 237 \\ 
  12 & b8 &  & 237 \\ 
  13 & b9 &  & 237 \\ 
  14 & b10 &  & 237 \\ 
   \hline
\end{tabular}
\end{table}

\end{footnotesize}

The most notable values here a small \texttt{b1} coefficient relative to the rest of the league, and a large \texttt{b5}. Referring to Figure \ref{fig:shot_bases}, we see that this means his shot-taking hazard is relatively low in the right-handed layup area, and relatively high in three point range. This suggests that, adjusting for his baseline shooting rate (\texttt{intercept}) and other situation covariates, Bosh attempts threes at a high rate (per time controlling the ball from three point range), and right-handed layups/dunks at a low rate. This behavior is generally shared among other stretch-4 type players who are catch-and-shoot three-point shooters, and whose touches near the basket come more from slow-developing plays or those that don't lead to shots---like ``isolations'' or offensive rebounds---than from layups or attacking (also, note that Bosh is left handed). For instance, players such as Kevin Love and Dirk Nowitzki exhibit similar behavior.

Analagous to Figure 5 in the paper, we can plot players' spatial effect surfaces. It is also helpful to plot only the random effects, to see where players' spatial tendencies differ from typical league behavior. For Chris Bosh's shot-taking hazard, we get these side-by-side with:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{vars} \hlkwb{<-} \hlkwd{paste0}\hlstd{(}\hlstr{"b"}\hlstd{,} \hlkwd{seq}\hlstd{(n.basis))}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in seq(n.basis): object 'n.basis' not found}}\begin{alltt}
\hlstd{spat.fixed} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.fixed[}\hlstr{"(Intercept)"}\hlstd{,} \hlstr{"mean"}\hlstd{]} \hlopt{+}
                           \hlkwd{t}\hlstd{(take.basis)} \hlopt{%*%} \hlstd{inla.out}\hlopt{$}\hlstd{summary.fixed[vars,} \hlstr{"mean"}\hlstd{])}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in t(take.basis): object 'take.basis' not found}}\begin{alltt}
\hlstd{spat.random} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player,} \hlstr{"mean"}\hlstd{]} \hlopt{+}
                            \hlkwd{t}\hlstd{(take.basis)} \hlopt{%*%} \hlstd{inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player} \hlopt{+} \hlstd{n} \hlopt{*} \hlstd{(}\hlnum{1}\hlopt{:}\hlstd{n.basis),} \hlstr{"mean"}\hlstd{])}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in t(take.basis): object 'take.basis' not found}}\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{4}\hlstd{,}\hlnum{1}\hlstd{,}\hlnum{6}\hlstd{))}
\hlkwd{spatialPlot1}\hlstd{(spat.fixed} \hlopt{+} \hlstd{spat.random,} \hlkwc{axis.args}\hlstd{=}\hlkwd{list}\hlstd{(}\hlkwc{cex.axis}\hlstd{=}\hlnum{0.75}\hlstd{))}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in which(mesh\$loc[, 1] >= 0 \& mesh\$loc[, 1] <= 47 \& mesh\$loc[, 2] >= : object 'mesh' not found}}\begin{alltt}
\hlkwd{spatialPlot1}\hlstd{(spat.random,} \hlkwc{axis.args}\hlstd{=}\hlkwd{list}\hlstd{(}\hlkwc{cex.axis}\hlstd{=}\hlnum{0.75}\hlstd{))}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in which(mesh\$loc[, 1] >= 0 \& mesh\$loc[, 1] <= 47 \& mesh\$loc[, 2] >= : object 'mesh' not found}}\end{kframe}
\end{knitrout}

To view the spatial effect on a passing hazard (for instance, to player 1---the point guard), we would do:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/INLA_PASS1.Rdata"}\hlstd{, data.dir))}
\hlstd{vars} \hlkwb{<-} \hlkwd{paste0}\hlstd{(}\hlstr{"b"}\hlstd{,} \hlkwd{seq}\hlstd{(n.basis))}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in seq(n.basis): object 'n.basis' not found}}\begin{alltt}
\hlstd{spat.fixed} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.fixed[}\hlstr{"(Intercept)"}\hlstd{,} \hlstr{"mean"}\hlstd{]} \hlopt{+}
                           \hlkwd{t}\hlstd{(pass1.basis)} \hlopt{%*%} \hlstd{inla.out}\hlopt{$}\hlstd{summary.fixed[vars,} \hlstr{"mean"}\hlstd{])}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in t(pass1.basis): object 'pass1.basis' not found}}\begin{alltt}
\hlstd{spat.random} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player,} \hlstr{"mean"}\hlstd{]} \hlopt{+}
                            \hlkwd{t}\hlstd{(pass1.basis)} \hlopt{%*%} \hlstd{inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player} \hlopt{+} \hlstd{n} \hlopt{*} \hlstd{(}\hlnum{1}\hlopt{:}\hlstd{n.basis),} \hlstr{"mean"}\hlstd{])}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in t(pass1.basis): object 'pass1.basis' not found}}\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{4}\hlstd{,}\hlnum{1}\hlstd{,}\hlnum{6}\hlstd{))}
\hlkwd{spatialPlot2}\hlstd{(}\hlkwd{head}\hlstd{(spat.fixed} \hlopt{+} \hlstd{spat.random, mesh}\hlopt{$}\hlstd{n),}
             \hlkwd{tail}\hlstd{(spat.fixed} \hlopt{+} \hlstd{spat.random, mesh}\hlopt{$}\hlstd{n),}
                  \hlkwc{axis.args}\hlstd{=}\hlkwd{list}\hlstd{(}\hlkwc{cex.axis}\hlstd{=}\hlnum{0.75}\hlstd{))}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in which(mesh\$loc[, 1] >= 0 \& mesh\$loc[, 1] <= 47 \& mesh\$loc[, 2] >= : object 'mesh' not found}}\end{kframe}
\end{knitrout}

Lastly, it's useful to check the hyperparameter estimates to make sure they are sensible. The hyperparameters for the macrotransition entry models (and shot probability model) and log precision terms for the CAR model, described in Sections 4.1 and 4.2 of the paper. In this implementation, we've fixed the hyperparameters for all spatial basis loadings to be the same within each macrotransition entry model.

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{inla.out}\hlopt{$}\hlstd{mode}\hlopt{$}\hlstd{theta}        \hlcom{# parameter values}
\end{alltt}
\begin{verbatim}
## [1] -2.36010142 -0.04486083  0.53744864  4.00845149 -0.12840774  0.46386675  0.88176478 -0.21073187
\end{verbatim}
\begin{alltt}
\hlstd{inla.out}\hlopt{$}\hlstd{mode}\hlopt{$}\hlstd{theta.tags}   \hlcom{# parameter names}
\end{alltt}
\begin{verbatim}
## [1] "Log precision for p.int"          "Log precision for p.dribble"     
## [3] "Log precision for p.ndef"         "Log precision for p.ball.lastsec"
## [5] "Log precision for p.doff1"        "Log precision for p.doff2"       
## [7] "Log precision for p.doff3"        "Log precision for p.ddef"
\end{verbatim}
\end{kframe}
\end{knitrout}


\subsection{Transition probability matrices}

The last model component needed to calculate EPV are the transition probability matrices for $C_t$, described in Section 3.4 of the paper. We load these---for instance, for Dwyane Wade, by running:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{player.id} \hlkwb{<-} \hlstd{players}\hlopt{$}\hlstd{player_id[}\hlkwd{grep}\hlstd{(}\hlstr{"Wade"}\hlstd{, players}\hlopt{$}\hlstd{lastname)]}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/tmats/%s.Rdata"}\hlstd{, data.dir, player.id))}
\hlkwd{names}\hlstd{(tmat.ind)}
\end{alltt}
\begin{verbatim}
## [1] "micros"  "passes1" "passes2" "passes3" "passes4" "absorbs"
\end{verbatim}
\end{kframe}
\end{knitrout}

\texttt{tmat.ind} is a list with each element representing blocks (sub-matrices) of $\tilde{\mathbf{N}}$, the transition count matrix for $C_t$ given the players on the court (see Section 3.4 of the paper). The rows in each block represent the 14 \{\texttt{region}\} $\times$ \{defended\} states we use in $C_t$ for a given ballcarrier, as expalined in Section 2.2 of the paper. Columns in these blocks also represent such states, except for the \texttt{absorbs} block, where columns represent absorbing states in $\mathcal{C}_{\text{end}}$. Depending on the lineup used, different blocks will be used to construct $\mathbf{P}$. Also note, the \texttt{tmat.pos} object contains blocks used in calculating EPV-Added, as discussed in Section A.4 of the paper.

\section{Calculating EPV}

\subsection{Coarsened state expected point values}

Given estimates of our parameters, EPV is calculated using Monte Carlo. The general idea, introduced in Section 3 of the paper, is to alternate draws from the micro- and macrotransition entry models until a macrotransition (pass, shot attempt, turnover) occurs. Then, given the predicted outcome of this macrotransition, we calculate EPV using the transition probability matrix of coarsened states. Before actually simulating EPV draws, it's useful to look at what the expected point values are of each coarsened state, as EPV will always be a weighted average of these values:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/parameters.R"}\hlstd{, code.dir))}
\hlstd{hyper} \hlkwb{<-} \hlkwd{getHyperParams}\hlstd{(tdat)} \hlcom{# makes sure all parameter inference is loaded}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(tdat): object 'inla.names' not found}}\begin{alltt}
\hlstd{ev.out} \hlkwb{<-} \hlkwd{evLineups}\hlstd{(tdat)} \hlcom{# coarsened state EVs for each offensive lineup in tdat}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in evLineups(tdat): object 'state\_nms' not found}}\end{kframe}
\end{knitrout}

% change "teammates.all" to "lineups"!
In \texttt{ev.out}, \texttt{teammates.all} is a matrix of 5-man lineups that appear in \texttt{tdat} (there may be duplicate rows). For instance, we have the starting 5 for the Miami Heat:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{lineup.ids} \hlkwb{<-} \hlstd{ev.out}\hlopt{$}\hlstd{teammates.all[}\hlnum{2}\hlstd{, ]}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in eval(expr, envir, enclos): object 'ev.out' not found}}\begin{alltt}
\hlstd{this.lineup} \hlkwb{<-} \hlstd{players[}\hlkwd{match}\hlstd{(lineup.ids, players}\hlopt{$}\hlstd{player_id), ]}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in match(lineup.ids, players\$player\_id): object 'lineup.ids' not found}}\begin{alltt}
\hlstd{this.lineup[,} \hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in eval(expr, envir, enclos): object 'this.lineup' not found}}\end{kframe}
\end{knitrout}

For each 5-man lineup, there are $5 \times 2 \text{ (defended or not) } \times 7 \text{ (court regions) } = 70$ coarsened state expected values. To check these for LeBron James' possession states, for instance,  we'd do:

\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{lineup.states} \hlkwb{<-} \hlkwd{paste}\hlstd{(}\hlkwd{rep}\hlstd{(this.lineup}\hlopt{$}\hlstd{lastname,} \hlkwc{each}\hlstd{=}\hlnum{14}\hlstd{), state_nms)} \hlcom{# state names}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in paste(rep(this.lineup\$lastname, each = 14), state\_nms): object 'this.lineup' not found}}\begin{alltt}
\hlkwd{xtable}\hlstd{(}\hlkwd{data.frame}\hlstd{(}\hlkwc{state}\hlstd{=lineup.states,} \hlkwc{EV}\hlstd{=ev.out}\hlopt{$}\hlstd{evs[[}\hlnum{2}\hlstd{]])[}\hlkwd{grep}\hlstd{(}\hlstr{"James"}\hlstd{, lineup.states), ],} \hlkwc{digits}\hlstd{=}\hlnum{2}\hlstd{)}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in data.frame(state = lineup.states, EV = ev.out\$evs[[2]]): object 'lineup.states' not found}}\end{kframe}
\end{footnotesize}

These results seem pretty sensible, as, for instance, EVs are uniformly higher for uncontested states, with the difference especially great within the restricted area (1.60 versus 1.42) and corner 3 (1.21 versus 1.09). Note that with different teammates, we would see slightly different EVs for these states.

\subsection{EPV curves}

As mentioned in the paper, given estimates of all parameter values, EPV is computed by Monte Carlo sampling from the multiresolution transition models. This is a computationally expensive procedure, dominated by computing spatial effects for every player-position update from the microtransition model and hazard calculation from the macrotransition entry/exit models. However, it is straightforward to sample multiple time points together.

To supply EPV curves for a full game, it's most efficient to draw a single EPV estimate for all time points in a game, and then parallelize this across multiple machines that don't need to share memory. The code executes an EPV draw at each time point for every offensive possession in our sample game:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/EPV_calcs.R"}\hlstd{, code.dir))}
\hlstd{draw.raw} \hlkwb{<-} \hlkwd{multiresDraw}\hlstd{(tdat, hyper, def.micro, ev.out,} \hlkwc{nmic}\hlstd{=}\hlnum{50}\hlstd{,} \hlkwc{save.positions}\hlstd{=F)}
\hlstd{draw} \hlkwb{<-} \hlkwd{compressEPV}\hlstd{(tdat, draw.raw}\hlopt{$}\hlstd{fv.epv.list)}
\end{alltt}
\end{kframe}
\end{knitrout}

The \texttt{nmic} argument specifies 50 iterations (2 seconds) of the microtranistion model, which is usually sufficient to observe $\tau_t$, a macrotransition entry. The \texttt{save.positions} argument stores the player-position innovations supplied by the micro model. These are necessary to reproduce Figure 7, which shows players' predicted motion paths, but necessitate lots of additional storage, as they essentially replicate the full positional data \texttt{nmic} times for each EPV draw.

We can load a pre-computed version of \texttt{draw}:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/draw.Rdata"}\hlstd{, data.dir))}
\hlkwd{names}\hlstd{(draw)}
\end{alltt}
\begin{verbatim}
## [1] "epv"       "probs"     "vals"      "probs.now" "vals.now"
\end{verbatim}
\end{kframe}
\end{knitrout}

Here, \texttt{epv} is a vector of EPV values corresponding to each row of \texttt{tdat}. \texttt{probs} is a data frame where each row gives the probabilities associated with each possible macrotransition event at time $t$ ($\mathbb{P}(C_{\delta_t} | \mathcal{F}^{(Z)}_t)$), and \texttt{vals} gives the associated expected point values conditional on these macrotransitions: $\mathbb{E}[X | C_{\delta_t}]$ (these probabilities/values are illustrated in Figure 7 of the paper). \texttt{probs} and \texttt{vals} contain an ``\texttt{other}'' state which represents no macrotransition occurring within the \texttt{50} simulated microtransitions. In this case, to calculate the expected value, we use the coarsened state expected value associated with the final microtransition draw, in this case $\mathbb{E}[X | C_{t + 2}]$. \texttt{probs.now} and \texttt{vals.now} are the instantaneous macrotransition probabilities and associated expected values. 

For instance, during the first possession in this game, after Udonis Haslem brings the ball into the offensive halfcourt, we see the next action to most likely be  pass to Chalmers or Chris Bosh (the next play is a pass to Bosh). A shot attempt is extremely unlikely, and there is a 0.226 probability that Haslem will still possess the ball 2 seconds down the road. Among his passing options, James is the most valuable, though also the least likely to occur (James is near the basket, but the passing lane doesn't appear to be open). 

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{transformed.data.plotter}\hlstd{(tdat,} \hlnum{30}\hlstd{)}
\end{alltt}
\end{kframe}\begin{figure}[H]

{\centering \includegraphics[width=\maxwidth]{figure/epv_example_1-1} 

}

\caption[Udonis Haslem with ball possession]{Udonis Haslem with ball possession. His four passing options are 1: Mario Chalmers, 2: Dwyane Wade, 3: LeBron James, 4: Chris Bosh.}\label{fig:epv_example_1}
\end{figure}

\begin{kframe}\begin{alltt}
\hlstd{draw}\hlopt{$}\hlstd{probs[}\hlnum{30}\hlstd{, ]}
\end{alltt}
\begin{verbatim}
##        pass1      pass2      pass3     pass4        make       miss         TO     other
## 30 0.5747731 0.02065291 0.01900831 0.1448298 0.000981215 0.00157953 0.01181597 0.2263592
\end{verbatim}
\begin{alltt}
\hlstd{draw}\hlopt{$}\hlstd{vals[}\hlnum{30}\hlstd{, ]}
\end{alltt}
\begin{verbatim}
##        pass1    pass2    pass3    pass4 make miss TO     other
## 30 0.9676442 1.017705 1.105218 1.014151    3 0.15  0 0.9707326
\end{verbatim}
\end{kframe}
\end{knitrout}

Because each EPV draw executes independent multiresolution transition simulations for each time point $t$, the resultant EPV curve is not very smooth. For instance, at time $t$, we might simulate a player driving toward the basket and attempting a layup, whereas at time $t+\epsilon$ we simulate the same player passing to a teammate. We see this below:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{tdat}\hlopt{$}\hlstd{game_clock[}\hlnum{1}\hlopt{:}\hlnum{100}\hlstd{], draw}\hlopt{$}\hlstd{epv[}\hlnum{1}\hlopt{:}\hlnum{100}\hlstd{],} \hlkwc{xlab}\hlstd{=}\hlstr{"game clock"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"EPV"}\hlstd{)}
\end{alltt}
\end{kframe}\begin{figure}[H]

{\centering \includegraphics[width=\maxwidth]{figure/epv_draw_plot-1} 

}

\caption[EPV estimates of a single draw]{EPV estimates of a single draw}\label{fig:epv_draw_plot}
\end{figure}


\end{knitrout}

Of course, averaging over multiple EPV draws offers more smoothness---though by design, we see spikes in EPV exactly at moments when passes/shots/turnovers occur. The files \texttt{EPV\_draw.R} and \texttt{combine\_draws.R} execute independent EPV draws for this game on a computing cluster. We have combined 200 of these draws to obtain a final Monte Carlo EPV estimate (as well as Monte Carlo estimates of the transition probabilities and values). Below we load this, and merge these EPV estimates into the original full data set \texttt{dat}, where EPV is \texttt{NA} when the ball is not in the offensive halfcourt with the game clock moving. We also compute a "smoothed EPV" to (very slightly) interpolate the pointwise EPV estimates over time.

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/EPV_calcs.R"}\hlstd{, code.dir))}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/combined.epv.draws.Rdata"}\hlstd{, data.dir))}
\hlstd{e.dat} \hlkwb{<-} \hlkwd{combineDatEPV}\hlstd{(dat, epv.table)}
\end{alltt}
\end{kframe}
\end{knitrout}

We can now plot out EPV ``tickers'', as in Figure 2 of the paper:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{xpd}\hlstd{=}\hlnum{NA}\hlstd{,} \hlkwc{bty}\hlstd{=}\hlstr{"n"}\hlstd{,} \hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,} \hlnum{2}\hlstd{))}
\hlstd{poss.1} \hlkwb{<-} \hlkwd{which}\hlstd{(e.dat}\hlopt{$}\hlstd{possID} \hlopt{==} \hlnum{1}\hlstd{)}
\hlkwd{plot}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{e.dat}\hlopt{$}\hlstd{game_clock[poss.1], e.dat}\hlopt{$}\hlstd{epv.smooth[poss.1],}
     \hlkwc{xlab}\hlstd{=}\hlstr{"game clock"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"EPV"}\hlstd{,} \hlkwc{type}\hlstd{=}\hlstr{"l"}\hlstd{,} \hlkwc{lwd}\hlstd{=}\hlnum{2}\hlstd{,} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{.5}\hlstd{,} \hlnum{1.5}\hlstd{))}
\hlkwd{points}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{e.dat}\hlopt{$}\hlstd{game_clock[poss.1], e.dat}\hlopt{$}\hlstd{epv[poss.1],} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.5}\hlstd{)}

\hlstd{poss.90} \hlkwb{<-} \hlkwd{which}\hlstd{(e.dat}\hlopt{$}\hlstd{possID} \hlopt{==} \hlnum{90}\hlstd{)} \hlcom{# possession shown in paper}
\hlkwd{plot}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{e.dat}\hlopt{$}\hlstd{game_clock[poss.90], e.dat}\hlopt{$}\hlstd{epv.smooth[poss.90],}
     \hlkwc{xlab}\hlstd{=}\hlstr{"game clock"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"EPV"}\hlstd{,} \hlkwc{type}\hlstd{=}\hlstr{"l"}\hlstd{,} \hlkwc{lwd}\hlstd{=}\hlnum{2}\hlstd{,} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{.5}\hlstd{,} \hlnum{1.5}\hlstd{))}
\hlkwd{points}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{e.dat}\hlopt{$}\hlstd{game_clock[poss.90], e.dat}\hlopt{$}\hlstd{epv[poss.90],} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.5}\hlstd{)}
\end{alltt}
\end{kframe}\begin{figure}[H]

{\centering \includegraphics[width=\maxwidth]{figure/epv_ticker_1-1} 

}

\caption[EPV curves for two possessions in this game]{EPV curves for two possessions in this game. The line slightly smooths the actual EPV values (dots).}\label{fig:epv_ticker_1}
\end{figure}


\end{knitrout}

One of the best ways to view EPV results is by generating gifs that show EPV curves side-by-side with the possession evolution. Below we've generated gifs for a pair of long and interesting-looking possessions (they're located in the \texttt{gifs} folder):

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{makeGIF}\hlstd{(e.dat,} \hlkwd{which}\hlstd{(e.dat}\hlopt{$}\hlstd{possID} \hlopt{==} \hlnum{12}\hlstd{),} \hlstr{"poss_12"}\hlstd{)} \hlcom{# takes a few minutes}
\hlkwd{makeGIF}\hlstd{(e.dat,} \hlkwd{which}\hlstd{(e.dat}\hlopt{$}\hlstd{possID} \hlopt{==} \hlnum{24}\hlstd{),} \hlstr{"poss_24"}\hlstd{)} \hlcom{# takes a few minutes}
\end{alltt}
\end{kframe}
\end{knitrout}

\subsection{Derived metrics}

The derived metrics presented in the paper, EPV-Added (EPVA) and shot satisfaction, are most meaningful when computed using a large sample of data, such as a full season. However, just as with any other basketball metric, we can calculate per-game versions of these statistics. For instance, to get these metrics for LeBron James and Deron Williams, we'd do:

\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{id} \hlkwb{<-} \hlstd{players}\hlopt{$}\hlstd{player_id[}\hlkwd{grep}\hlstd{(}\hlstr{"LeBron"}\hlstd{, players}\hlopt{$}\hlstd{firstname)]}
\hlkwd{sum}\hlstd{(}\hlkwd{EPVA}\hlstd{(tdat, id))}       \hlcom{# sums EPV added on each touch}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(id.dat): object 'inla.names' not found}}\begin{alltt}
\hlkwd{mean}\hlstd{(}\hlkwd{shotSatis}\hlstd{(tdat, id))} \hlcom{# averages shot satisfaction of each touch}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(id.dat): object 'inla.names' not found}}\begin{alltt}
\hlstd{id} \hlkwb{<-} \hlstd{players}\hlopt{$}\hlstd{player_id[}\hlkwd{grep}\hlstd{(}\hlstr{"Deron"}\hlstd{, players}\hlopt{$}\hlstd{firstname)]} \hlcom{# Deron Williams}
\hlkwd{sum}\hlstd{(}\hlkwd{EPVA}\hlstd{(tdat, id))}       \hlcom{# sums EPV added on each touch}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(id.dat): object 'inla.names' not found}}\begin{alltt}
\hlkwd{mean}\hlstd{(}\hlkwd{shotSatis}\hlstd{(tdat, id))} \hlcom{# averages shot satisfaction of each touch}
\end{alltt}


{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(id.dat): object 'inla.names' not found}}\end{kframe}
\end{knitrout}

\section{Appendix}

Raw data \texttt{event\_id} codes:
\begin{table}[h!]
\begin{tabular}{lr|lr|lr|lr|lr}
\toprule
Event & ID & Event & ID & Event & ID & Event & ID & Event & ID \\
\midrule
FT Made &  1 & Def. Rebound &  6 & Timeout & 11 & Clock Sync & 16 & Dribble & 21 \\
FT Missed &  2 & Turnover &  7 & Jump Ball & 12 & Instant Replay & 17 & Pass & 22 \\
Shot Made &  3 & Foul  & 8 & Ejection & 13 & Replay Ruling & 18 & Possession & 23 \\
Shot Missed &  4 & Violation &  9 & Start Period & 14 & Game Over & 19 & Shot Block & 24 \\
Off. Rebound &  5 &
Substitution & 10 &
End Period & 15 &
Stoppage & 20 &
Assist & 25  \\
\bottomrule
\end{tabular}
\label{tab:event_codes}
\caption{Glossary of \texttt{event\_id} codes in optical tracking data.}
\end{table}

\end{document}