forked from dcervone/EPVDemo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEPV_demo.tex
945 lines (749 loc) · 51 KB
/
EPV_demo.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
\documentclass{article}\usepackage[]{graphicx}\usepackage[]{color}
% maxwidth is the original width if it is less than linewidth
% otherwise use linewidth (to make sure the graphics do not exceed the margin)
\makeatletter
\def\maxwidth{ %
\ifdim\Gin@nat@width>\linewidth
\linewidth
\else
\Gin@nat@width
\fi
}
\makeatother
\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345}
\newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}%
\newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}%
\newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}%
\newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}%
\newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}%
\newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}%
\newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}%
\newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}%
\newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}%
\let\hlipl\hlkwb
\usepackage{framed}
\makeatletter
\newenvironment{kframe}{%
\def\at@end@of@kframe{}%
\ifinner\ifhmode%
\def\at@end@of@kframe{\end{minipage}}%
\begin{minipage}{\columnwidth}%
\fi\fi%
\def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
\colorbox{shadecolor}{##1}\hskip-\fboxsep
% There is no \\@totalrightmargin, so:
\hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
\MakeFramed {\advance\hsize-\width
\@totalleftmargin\z@ \linewidth\hsize
\@setminipage}}%
{\par\unskip\endMakeFramed%
\at@end@of@kframe}
\makeatother
\definecolor{shadecolor}{rgb}{.97, .97, .97}
\definecolor{messagecolor}{rgb}{0, 0, 0}
\definecolor{warningcolor}{rgb}{1, 0, 1}
\definecolor{errorcolor}{rgb}{1, 0, 0}
\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX
\usepackage{alltt}
\usepackage{fullpage}
\usepackage{booktabs}
\usepackage{amsthm,amsmath,amssymb}
\usepackage{float}
\RequirePackage{natbib}
\usepackage{graphicx}
\title{EPV Demo \\
\Large
Supplement to ``A Multiresolution Stochastic Process Model for Predicting Basketball Possession Outcomes''}
\author{Daniel Cervone, Alex D'Amour, Luke Bornn and Kirk Goldsberry}
\date{}
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
\begin{document}
\maketitle
This document provides a demonstration of the code, methodology, and inferential results for the EPV model discussed in our paper.
\section{Loading the data}
To begin, we must first set the directories containing the supplemental data and code, and install/load all necessary packages.
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{code.dir} \hlkwb{<-} \hlstr{"./code"}
\hlstd{data.dir} \hlkwb{<-} \hlstr{"./data"}
\end{alltt}
\end{kframe}
\end{knitrout}
Now we load the \texttt{csv} file containing a full game of optical tracking data. As mentioned in the paper, data from this game was not used in parameter inference for any model related to EPV.
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{dat} \hlkwb{<-} \hlkwd{read.csv}\hlstd{(}\hlkwc{file}\hlstd{=}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/2013_11_01_MIA_BKN.csv"}\hlstd{, data.dir))}
\end{alltt}
\end{kframe}
\end{knitrout}
Each row of \texttt{dat} represents a time point (sampled 25 times per second), and columns include
\begin{table}[!h]
\begin{center}
\begin{tabular}{r|ll}
\toprule
Column & Value & Notes \\
\midrule
\texttt{time} & Real time (ms) & \\
\texttt{game} & Game ID & \\
\texttt{quarter} & Quarter & \\
\texttt{shot\_clock} & Time remaining on shot clock & \texttt{NA} for this game \\
\texttt{game\_clock} & Time remaining in quarter (s) & \\
\texttt{x, y, z} & Ball position (ft) & Court region is $[0, 94] \times [0, 50]$ \\
\texttt{a1\_ent} & ID number of player 1 on away team (\texttt{a1}) & \\
\texttt{a1\_x, a1\_y} & Position of \texttt{a1} & \\
\texttt{a1\_event} & Event code for player \texttt{a1} & See Table \ref{tab:event_codes} for reference \\
\texttt{a\#\_*, h\#\_*} & As for \texttt{a1} & \\
\bottomrule
\end{tabular}
\caption{Description of variables in optical tracking data sample.}
\label{tab:data_desc}
\end{center}
\end{table}
Let's plot the data for some arbitrary moment in the game in Figure \ref{fig:plot_data}.
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/constants.R"}\hlstd{, code.dir))} \hlcom{# loads libraries and constants used throughout code}
\end{alltt}
\begin{verbatim}
## Warning in install.packages :
## unable to access index for repository http://www.math.ntnu.no/inla/R/stable/src/contrib:
## cannot open URL 'http://www.math.ntnu.no/inla/R/stable/src/contrib/PACKAGES'
## Warning in install.packages :
## package 'INLA' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning in install.packages :
## unable to access index for repository http://www.math.ntnu.no/inla/R/stable/bin/macosx/contrib/4.0:
## cannot open URL 'http://www.math.ntnu.no/inla/R/stable/bin/macosx/contrib/4.0/PACKAGES'
##
## The downloaded binary packages are in
## /var/folders/zl/8xhj8rts3yxc506fzd8yw9th0000gp/T//Rtmpn82Yzg/downloaded_packages
\end{verbatim}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in library(INLA): there is no package called 'INLA'}}\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/graphics.R"}\hlstd{, code.dir))} \hlcom{# graphics/plotting functions}
\hlkwd{par}\hlstd{(}\hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,} \hlnum{0}\hlstd{,} \hlnum{0}\hlstd{,} \hlnum{0}\hlstd{))}
\hlkwd{data.plotter}\hlstd{(dat,} \hlnum{1800}\hlstd{)}
\end{alltt}
\end{kframe}\begin{figure}[H]
{\centering \includegraphics[width=\maxwidth]{figure/plot_data-1}
}
\caption[Plotting a single moment of optical tracking data]{Plotting a single moment of optical tracking data.}\label{fig:plot_data}
\end{figure}
\end{knitrout}
\subsection{Transformed data}
In this format, the data lacks information necessary for computing EPV. Most importantly, the identity of the ballcarrier is not labeled, and most be inferred by the record of game actions (and positional data). We also need to record the covariates used by our multiresolution transition models, and perform some simple data manipulations, such as rotating all data to the offensive half-court and removing moments where the gameplay is suspended. The following code performs these data tasks:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/data_formatting.R"}\hlstd{, code.dir))}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/covariates.R"}\hlstd{, code.dir))}
\hlstd{poss} \hlkwb{<-} \hlkwd{possession.indicator}\hlstd{(dat)} \hlcom{# infer ballcarrier... takes about a minute}
\hlstd{tdat} \hlkwb{<-} \hlkwd{rearrange.data}\hlstd{(dat, poss)} \hlcom{# re-shuffle columns by to ballcarrier... (2 min)}
\hlstd{tdat} \hlkwb{<-} \hlkwd{offensive.halfcourt}\hlstd{(tdat)} \hlcom{# transforming to offensive halfcourt}
\hlstd{tdat} \hlkwb{<-} \hlkwd{offensive.ballcarrier}\hlstd{(tdat)}
\hlstd{touchID} \hlkwb{<-} \hlkwd{get.touchID}\hlstd{(tdat)}
\hlstd{covariates} \hlkwb{<-} \hlkwd{getAllCovars}\hlstd{(tdat)} \hlcom{# get covariates... (3 min)}
\hlstd{tdat} \hlkwb{<-} \hlkwd{data.frame}\hlstd{(tdat,} \hlkwc{touchID}\hlstd{=touchID, covariates)}
\hlkwd{save}\hlstd{(tdat,} \hlkwc{file}\hlstd{=}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/tdat.Rdata"}\hlstd{, data.dir))}
\end{alltt}
\end{kframe}
\end{knitrout}
Or, since this takes few minutes to complete, it may be easier to load a pre-computed version of the transformed data set, \texttt{tdat}:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/tdat.Rdata"}\hlstd{, data.dir))}
\end{alltt}
\end{kframe}
\end{knitrout}
\section{Components of hierarchical models}
\subsection{Player similarity adjacency matrix, $\mathbf{H}$}
The hierarchical models used to estimate parameters for the multiresolution transition models rely on preprocessed data summaries. First, the conditional autoregressive model priors used for many model parameters rely on a graph $\mathbf{H}$ of player similarity, represented using an adjacency matrix. As discussed in the paper, this graph is constructed based on the similarity in players' court occupancy distributions. We can visualize these court occupancy distributions, as well as the similarity scores we calculate between them.
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/playerbases.Rdata"}\hlstd{, data.dir))}
\hlstd{players} \hlkwb{<-} \hlkwd{read.csv}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/players2013.csv"}\hlstd{, data.dir))}
\hlkwd{head}\hlstd{(players)}
\end{alltt}
\begin{verbatim}
## player_id firstname lastname position height weight byear rookie position.number
## 1 3306 Elton Brand Forward-Center 81 254 1979 1999 8
## 2 58293 Kyle Korver Guard-Forward 79 212 1981 2003 4
## 3 292401 Lou Williams Guard 73 175 1986 2005 2
## 4 237675 Paul Millsap Forward-Center 80 258 1985 2006 8
## 5 280587 Al Horford Forward-Center 82 250 1986 2007 8
## 6 398043 Jeff Teague Point-Guard 74 181 1988 2009 1
\end{verbatim}
\end{kframe}
\end{knitrout}
\texttt{players} is a directory of the 461 NBA players in the 2013-14 season, and \texttt{playerbases.Rdata} contains summaries of their court occupancy patterns. \texttt{df} is the matrix $\mathbf{G}$ from the paper: plotting its rows reveals stark differences in players' spatial occupancy patterns:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{5}\hlstd{)}
\hlkwd{spatialPlot0}\hlstd{(df[i, ],} \hlkwc{legend}\hlstd{=F)}
\end{alltt}
\end{kframe}\begin{figure}[H]
{\centering \includegraphics[width=\maxwidth]{figure/plot_occupancy-1}
}
\caption[Court occupancy distributions]{Court occupancy distributions.}\label{fig:plot_occupancy}
\end{figure}
\end{knitrout}
In the paper, we use non-negative matrix factorization to obtain a rank 5 approximation of the court occupancy distribution matrix. The basis surfaces of this approximation, given in Figure 8 of the paper, are reproduced here:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{5}\hlstd{)}
\hlkwd{spatialPlot0}\hlstd{(nmf.basis[i, ],} \hlkwc{legend}\hlstd{=F)}
\end{alltt}
\end{kframe}\begin{figure}[H]
{\centering \includegraphics[width=\maxwidth]{figure/plot_occupancy_bases-1}
}
\caption[Court occupancy distribution bases]{Court occupancy distribution bases.}\label{fig:plot_occupancy_bases}
\end{figure}
\end{knitrout}
Projected onto this basis, the court occupancy distributions shown in Figure \ref{fig:plot_occupancy} look like:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{df.lowrank} \hlkwb{<-} \hlstd{nmf.coef} \hlopt{%*%} \hlstd{nmf.basis}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{5}\hlstd{)}
\hlkwd{spatialPlot0}\hlstd{(df.lowrank[i, ],} \hlkwc{legend}\hlstd{=F)}
\end{alltt}
\end{kframe}\begin{figure}[H]
{\centering \includegraphics[width=\maxwidth]{figure/appx_occupancy-1}
}
\caption{Low rank court occupancy distributions for players shown in Figure \ref{fig:plot_occupancy}.}\label{fig:appx_occupancy}
\end{figure}
\end{knitrout}
It's better to compute player similarity using distance in the space of basis loadings, rather than the original court occupancy distributions, as such distances are calculated across axes that best describe player variation. We calculate \texttt{K}, a distance matrix comparing the loadings for the court occupancy distributions of all 461 players, then map this to a symmetric adjacency matrix \texttt{H} based on finding each player's closest eight neighbors:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{K} \hlkwb{<-} \hlkwd{matrix}\hlstd{(}\hlnum{NA}\hlstd{,} \hlkwc{nrow}\hlstd{=}\hlkwd{nrow}\hlstd{(df),} \hlkwc{ncol}\hlstd{=}\hlkwd{nrow}\hlstd{(df))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(K))\{}
\hlstd{this.coef} \hlkwb{<-} \hlstd{nmf.coef[i, ]} \hlopt{/} \hlkwd{sum}\hlstd{(nmf.coef[i, ])}
\hlstd{K[i, ]} \hlkwb{<-} \hlkwd{apply}\hlstd{(nmf.coef,} \hlnum{1}\hlstd{,} \hlkwa{function}\hlstd{(}\hlkwc{r}\hlstd{)} \hlkwd{sum}\hlstd{((r} \hlopt{/} \hlkwd{sum}\hlstd{(r)} \hlopt{-} \hlstd{this.coef)}\hlopt{^}\hlnum{2}\hlstd{))}
\hlstd{\}}
\hlstd{H} \hlkwb{<-} \hlnum{0} \hlopt{*} \hlstd{K}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlkwd{nrow}\hlstd{(H))\{}
\hlstd{inds} \hlkwb{<-} \hlkwd{order}\hlstd{(K[i, ])[}\hlnum{1}\hlopt{:}\hlnum{8} \hlopt{+} \hlnum{1}\hlstd{]}
\hlstd{H[i,inds]} \hlkwb{<-} \hlstd{H[inds, i]} \hlkwb{<-} \hlnum{1}
\hlstd{\}}
\end{alltt}
\end{kframe}
\end{knitrout}
To check any player's ``neighbors'' according to \texttt{H}, we can do (for Al Horford):
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{this.player} \hlkwb{<-} \hlkwd{grep}\hlstd{(}\hlstr{"Horford"}\hlstd{, players}\hlopt{$}\hlstd{lastname)}
\hlkwd{paste}\hlstd{(players}\hlopt{$}\hlstd{firstname, players}\hlopt{$}\hlstd{lastname)[}\hlkwd{which}\hlstd{(H[this.player, ]} \hlopt{==} \hlnum{1}\hlstd{)]}
\end{alltt}
\begin{verbatim}
## [1] "Brandon Bass" "J.J. Hickson" "Andre Drummond" "Tony Mitchell"
## [5] "David Lee" "Dwight Howard" "Blake Griffin" "Zach Randolph"
## [9] "Anthony Davis" "Amar'e Stoudemire" "Jason Maxiell" "Glen Davis"
## [13] "DeMarcus Cousins" "Jonas Valanciunas" "Enes Kanter"
\end{verbatim}
\end{kframe}
\end{knitrout}
\subsection{Spatial effect basis functions}
Similarly, let's load the basis functions that are used in representing the spatial effects in players' macrotransition entry models: we denote these basis functions $\phi_{ji}$, where $i=1, \ldots, 10$, and $j$ indexes shot-taking, four different pass options, and turnovers (recall that for the spatial effects in the shot probability model (Equation 10 in the paper), we use the same basis functions as we do for the shot-taking hazard model). To recreate Figure 6 of the paper, which plots the shot-taking bases, we'd do:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow} \hlstd{=} \hlkwd{c}\hlstd{(}\hlnum{2}\hlstd{,}\hlnum{5}\hlstd{))}
\hlkwa{for}\hlstd{(i} \hlkwa{in} \hlnum{1}\hlopt{:}\hlnum{10}\hlstd{)}
\hlkwd{spatialPlot1}\hlstd{(take.basis[i, ],} \hlkwc{legend}\hlstd{=F)}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in which(mesh\$loc[, 1] >= 0 \& mesh\$loc[, 1] <= 47 \& mesh\$loc[, 2] >= : object 'mesh' not found}}\end{kframe}
\end{knitrout}
\section{Loading parameters and model estimates}
\subsection{Microtransition model}
Here, we will load and illustrate the results of the multiresolution transition models discussed in Section 3 of the paper. First, let's load the (offensive) microtransition model output for LeBron James, print the parameter estimates, and plot of the acceleration effects $\mu^{\ell}_x, \mu^{\ell}_y$, as in Figure 4 of the paper.
\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{player.id} \hlkwb{<-} \hlstd{players}\hlopt{$}\hlstd{player_id[}\hlkwd{which}\hlstd{(players}\hlopt{$}\hlstd{firstname} \hlopt{==} \hlstr{"LeBron"}\hlstd{)]}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/micros/%s.Rdata"}\hlstd{, data.dir, player.id))}
\hlcom{# x component of LeBron James' micro model during ball possession}
\hlkwd{xtable}\hlstd{(with.ball}\hlopt{$}\hlstd{io.x}\hlopt{$}\hlstd{summary.fixed[,} \hlnum{1}\hlopt{:}\hlnum{5}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:35 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrr}
\hline
& mean & sd & 0.025quant & 0.5quant & 0.975quant \\
\hline
dif & 0.98 & 0.00 & 0.98 & 0.98 & 0.98 \\
intercept & 0.00 & 0.01 & -0.03 & 0.00 & 0.03 \\
\hline
\end{tabular}
\end{table}
\end{footnotesize}
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{))}
\hlkwd{vectorPlot}\hlstd{(with.ball)}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in inla.mesh.projector(mesh, dims = c(40, 40)): could not find function "{}inla.mesh.projector"{}}}\begin{alltt}
\hlkwd{vectorPlot}\hlstd{(without.ball)}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in inla.mesh.projector(mesh, dims = c(40, 40)): could not find function "{}inla.mesh.projector"{}}}\end{kframe}
\end{knitrout}
The defensive microtransition model is less complicated, and we can fit it very quickly. The code below estimates the same model parameters for all players on defense:
\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/parameters.R"}\hlstd{, code.dir))} \hlcom{# loads many modeling functions}
\hlstd{def.micro} \hlkwb{<-} \hlkwd{microDefModel}\hlstd{(tdat)}
\hlcom{# coefficients are a_x, c_x, and b_x from Equation 6 in paper}
\hlkwd{xtable}\hlstd{(}\hlkwd{summary}\hlstd{(def.micro}\hlopt{$}\hlstd{mod.x)}\hlopt{$}\hlstd{coef[,} \hlnum{1}\hlopt{:}\hlnum{3}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:39 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrr}
\hline
& Estimate & Std. Error & t value \\
\hline
(Intercept) & -0.00 & 0.00 & -29.74 \\
def.eps.x[-length(def.eps.x)] & 0.96 & 0.00 & 1315.54 \\
residual.x[-length(residual.x)] & -0.00 & 0.00 & -27.12 \\
opt.eps.x[-length(opt.eps.x)] & 0.00 & 0.00 & 3.99 \\
\hline
\end{tabular}
\end{table}
\end{footnotesize}
\subsection{Macrotransition entry models}
We have six macrotransition entry models (from Section 3.2 of the paper). Each is fit hierarchically for all players in the NBA using the R-INLA software, as discussed in Section 4 of the paper. Let's load the results of the shot-taking macrotransition entry model, and interpret some of the results.
\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/INLA_TAKE.Rdata"}\hlstd{, data.dir))}
\hlcom{# coefficients for time-varying covariates in shot-taking hazard model}
\hlkwd{xtable}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.fixed[,} \hlnum{1}\hlopt{:}\hlnum{2}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrr}
\hline
& mean & sd \\
\hline
(Intercept) & -3.30 & 0.63 \\
dribble & -0.32 & 0.01 \\
ndef & -0.08 & 0.01 \\
ball.lastsec & 0.06 & 0.00 \\
b1 & 1.79 & 0.63 \\
b2 & -1.62 & 0.63 \\
b3 & -0.52 & 0.64 \\
b4 & 0.82 & 0.63 \\
b5 & -6.80 & 0.64 \\
b6 & -1.60 & 0.64 \\
b7 & -3.25 & 0.63 \\
b8 & -2.89 & 0.64 \\
b9 & -3.62 & 0.63 \\
b10 & -0.80 & 0.64 \\
\hline
\end{tabular}
\end{table}
\end{footnotesize}
\texttt{b1} is the coefficient for the loading on the first basis function (Figure \ref{fig:shot_bases}). These are fixed effects, so that player-specific coefficient values are represented as random effects. Parameter inference for the random effects are presented somewhat confusingly in the output from R-INLA. Inference for random effects on the situational covariates are stored in matrices where rows represent different players. For instance, for Chris Bosh, we get the mean, SD, and quantiles of his player-specific \texttt{dribble} parameter\footnote{See Appendix A.1 of the paper for explanations on the meaning of the covariates used} by running:
\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{this.player} \hlkwb{<-} \hlkwd{grep}\hlstd{(}\hlstr{"Bosh"}\hlstd{, players}\hlopt{$}\hlstd{lastname)}
\hlkwd{xtable}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.dribble[this.player,} \hlnum{2}\hlopt{:}\hlnum{6}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrr}
\hline
& mean & sd & 0.025quant & 0.5quant & 0.975quant \\
\hline
237 & 0.31 & 0.09 & 0.14 & 0.31 & 0.49 \\
\hline
\end{tabular}
\end{table}
\end{footnotesize}
However, the random effects on the spatial basis coefficients are stacked in a $(1 + 10) \times 461$ matrix (there are 461 players in our full NBA data), with 11 461-row submatrices giving the random effects on the intercept and each 10 basis function coefficient, in order. This matrix is copied across all $11$ corresponding output fields in the \texttt{inla.out\$summary.random} object:
\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{n.player} \hlkwb{<-} \hlkwd{nrow}\hlstd{(players)}
\hlcom{# inference for Chris Bosh's intercept and first basis coefficient}
\hlkwd{xtable}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player} \hlopt{+} \hlnum{0}\hlopt{:}\hlnum{1}\hlstd{,} \hlnum{2}\hlopt{:}\hlnum{6}\hlstd{])}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrr}
\hline
& mean & sd & 0.025quant & 0.5quant & 0.975quant \\
\hline
237 & -0.02 & 0.47 & -0.95 & -0.02 & 0.90 \\
238 & -0.56 & 0.49 & -1.52 & -0.56 & 0.40 \\
\hline
\end{tabular}
\end{table}
\begin{kframe}\begin{alltt}
\hlkwd{xtable}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.b1[this.player} \hlopt{+} \hlnum{0}\hlopt{:}\hlnum{1}\hlstd{,} \hlnum{2}\hlopt{:}\hlnum{6}\hlstd{])} \hlcom{# identical}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rrrrrr}
\hline
& mean & sd & 0.025quant & 0.5quant & 0.975quant \\
\hline
237 & -0.02 & 0.47 & -0.95 & -0.02 & 0.90 \\
238 & -0.56 & 0.49 & -1.52 & -0.56 & 0.40 \\
\hline
\end{tabular}
\end{table}
\end{footnotesize}
The following code rearranges the output into a single matrix, with each row giving the player-specific parameters' posterior mean (fixed $+$ random effects) for all model components (situational covariates and spatial effects).
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{param.names} \hlkwb{<-} \hlkwd{row.names}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.fixed)}
\hlstd{n} \hlkwb{<-} \hlkwd{nrow}\hlstd{(players)}
\hlstd{player.params} \hlkwb{<-} \hlkwd{matrix}\hlstd{(}\hlnum{NA}\hlstd{,} \hlkwc{nrow}\hlstd{=n,} \hlkwc{ncol}\hlstd{=}\hlkwd{length}\hlstd{(param.names))}
\hlstd{y.fix} \hlkwb{<-} \hlstd{inla.out}\hlopt{$}\hlstd{summary.fixed[,} \hlstr{"mean"}\hlstd{]} \hlcom{# fixed effects}
\hlstd{temp} \hlkwb{<-} \hlkwd{names}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random)}
\hlstd{basis.inds} \hlkwb{<-} \hlkwd{c}\hlstd{(}\hlkwd{which}\hlstd{(temp} \hlopt{==} \hlstr{"p.int"}\hlstd{),} \hlkwd{grep}\hlstd{(}\hlstr{"p.b[0-9][0-9]*"}\hlstd{, temp))}
\hlstd{cov.inds} \hlkwb{<-} \hlkwd{setdiff}\hlstd{(}\hlkwd{seq}\hlstd{(}\hlkwd{length}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random)), basis.inds)}
\hlkwa{for}\hlstd{(pl} \hlkwa{in} \hlnum{1}\hlopt{:}\hlstd{n) \{}
\hlcom{# add players' random effects to fixed effects}
\hlstd{y.rand} \hlkwb{<-} \hlkwd{c}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[pl,} \hlstr{"mean"}\hlstd{],}
\hlkwd{sapply}\hlstd{(cov.inds,}
\hlkwa{function}\hlstd{(}\hlkwc{k}\hlstd{) inla.out}\hlopt{$}\hlstd{summary.random[[k]][pl,} \hlstr{"mean"}\hlstd{]),}
\hlstd{inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.b1[pl} \hlopt{+} \hlstd{n} \hlopt{*} \hlstd{(}\hlnum{1}\hlopt{:}\hlstd{n.basis),} \hlstr{"mean"}\hlstd{])}
\hlstd{player.params[pl, ]} \hlkwb{<-} \hlstd{y.fix} \hlopt{+} \hlstd{y.rand}
\hlstd{\}}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in `[.data.frame`(inla.out\$summary.random\$p.b1, pl + n * (1:n.basis), : object 'n.basis' not found}}\end{kframe}
\end{knitrout}
For Chris Bosh, for instance, we can view his parameter estimates and see where each ranks relative to the rest of the league:
\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{values} \hlkwb{<-} \hlstd{player.params[this.player, ]}
\hlstd{ranks} \hlkwb{<-} \hlkwd{apply}\hlstd{(player.params,} \hlnum{2}\hlstd{,} \hlkwa{function}\hlstd{(}\hlkwc{col}\hlstd{)} \hlkwd{rank}\hlstd{(col)[this.player])} \hlcom{# increasing order}
\hlkwd{xtable}\hlstd{(}\hlkwd{data.frame}\hlstd{(param.names, values, ranks),} \hlkwc{digits}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{0}\hlstd{,}\hlnum{0}\hlstd{,}\hlnum{2}\hlstd{,}\hlnum{0}\hlstd{))}
\end{alltt}
\end{kframe}% latex table generated in R 4.0.3 by xtable 1.8-4 package
% Sun Jan 31 15:13:47 2021
\begin{table}[ht]
\centering
\begin{tabular}{rllr}
\hline
& param.names & values & ranks \\
\hline
1 & (Intercept) & & 237 \\
2 & dribble & & 237 \\
3 & ndef & & 237 \\
4 & ball.lastsec & & 237 \\
5 & b1 & & 237 \\
6 & b2 & & 237 \\
7 & b3 & & 237 \\
8 & b4 & & 237 \\
9 & b5 & & 237 \\
10 & b6 & & 237 \\
11 & b7 & & 237 \\
12 & b8 & & 237 \\
13 & b9 & & 237 \\
14 & b10 & & 237 \\
\hline
\end{tabular}
\end{table}
\end{footnotesize}
The most notable values here a small \texttt{b1} coefficient relative to the rest of the league, and a large \texttt{b5}. Referring to Figure \ref{fig:shot_bases}, we see that this means his shot-taking hazard is relatively low in the right-handed layup area, and relatively high in three point range. This suggests that, adjusting for his baseline shooting rate (\texttt{intercept}) and other situation covariates, Bosh attempts threes at a high rate (per time controlling the ball from three point range), and right-handed layups/dunks at a low rate. This behavior is generally shared among other stretch-4 type players who are catch-and-shoot three-point shooters, and whose touches near the basket come more from slow-developing plays or those that don't lead to shots---like ``isolations'' or offensive rebounds---than from layups or attacking (also, note that Bosh is left handed). For instance, players such as Kevin Love and Dirk Nowitzki exhibit similar behavior.
Analagous to Figure 5 in the paper, we can plot players' spatial effect surfaces. It is also helpful to plot only the random effects, to see where players' spatial tendencies differ from typical league behavior. For Chris Bosh's shot-taking hazard, we get these side-by-side with:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{vars} \hlkwb{<-} \hlkwd{paste0}\hlstd{(}\hlstr{"b"}\hlstd{,} \hlkwd{seq}\hlstd{(n.basis))}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in seq(n.basis): object 'n.basis' not found}}\begin{alltt}
\hlstd{spat.fixed} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.fixed[}\hlstr{"(Intercept)"}\hlstd{,} \hlstr{"mean"}\hlstd{]} \hlopt{+}
\hlkwd{t}\hlstd{(take.basis)} \hlopt{%*%} \hlstd{inla.out}\hlopt{$}\hlstd{summary.fixed[vars,} \hlstr{"mean"}\hlstd{])}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in t(take.basis): object 'take.basis' not found}}\begin{alltt}
\hlstd{spat.random} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player,} \hlstr{"mean"}\hlstd{]} \hlopt{+}
\hlkwd{t}\hlstd{(take.basis)} \hlopt{%*%} \hlstd{inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player} \hlopt{+} \hlstd{n} \hlopt{*} \hlstd{(}\hlnum{1}\hlopt{:}\hlstd{n.basis),} \hlstr{"mean"}\hlstd{])}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in t(take.basis): object 'take.basis' not found}}\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{4}\hlstd{,}\hlnum{1}\hlstd{,}\hlnum{6}\hlstd{))}
\hlkwd{spatialPlot1}\hlstd{(spat.fixed} \hlopt{+} \hlstd{spat.random,} \hlkwc{axis.args}\hlstd{=}\hlkwd{list}\hlstd{(}\hlkwc{cex.axis}\hlstd{=}\hlnum{0.75}\hlstd{))}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in which(mesh\$loc[, 1] >= 0 \& mesh\$loc[, 1] <= 47 \& mesh\$loc[, 2] >= : object 'mesh' not found}}\begin{alltt}
\hlkwd{spatialPlot1}\hlstd{(spat.random,} \hlkwc{axis.args}\hlstd{=}\hlkwd{list}\hlstd{(}\hlkwc{cex.axis}\hlstd{=}\hlnum{0.75}\hlstd{))}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in which(mesh\$loc[, 1] >= 0 \& mesh\$loc[, 1] <= 47 \& mesh\$loc[, 2] >= : object 'mesh' not found}}\end{kframe}
\end{knitrout}
To view the spatial effect on a passing hazard (for instance, to player 1---the point guard), we would do:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/INLA_PASS1.Rdata"}\hlstd{, data.dir))}
\hlstd{vars} \hlkwb{<-} \hlkwd{paste0}\hlstd{(}\hlstr{"b"}\hlstd{,} \hlkwd{seq}\hlstd{(n.basis))}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in seq(n.basis): object 'n.basis' not found}}\begin{alltt}
\hlstd{spat.fixed} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.fixed[}\hlstr{"(Intercept)"}\hlstd{,} \hlstr{"mean"}\hlstd{]} \hlopt{+}
\hlkwd{t}\hlstd{(pass1.basis)} \hlopt{%*%} \hlstd{inla.out}\hlopt{$}\hlstd{summary.fixed[vars,} \hlstr{"mean"}\hlstd{])}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in t(pass1.basis): object 'pass1.basis' not found}}\begin{alltt}
\hlstd{spat.random} \hlkwb{<-} \hlkwd{as.numeric}\hlstd{(inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player,} \hlstr{"mean"}\hlstd{]} \hlopt{+}
\hlkwd{t}\hlstd{(pass1.basis)} \hlopt{%*%} \hlstd{inla.out}\hlopt{$}\hlstd{summary.random}\hlopt{$}\hlstd{p.int[this.player} \hlopt{+} \hlstd{n} \hlopt{*} \hlstd{(}\hlnum{1}\hlopt{:}\hlstd{n.basis),} \hlstr{"mean"}\hlstd{])}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in t(pass1.basis): object 'pass1.basis' not found}}\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{2}\hlstd{),} \hlkwc{mar}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,}\hlnum{4}\hlstd{,}\hlnum{1}\hlstd{,}\hlnum{6}\hlstd{))}
\hlkwd{spatialPlot2}\hlstd{(}\hlkwd{head}\hlstd{(spat.fixed} \hlopt{+} \hlstd{spat.random, mesh}\hlopt{$}\hlstd{n),}
\hlkwd{tail}\hlstd{(spat.fixed} \hlopt{+} \hlstd{spat.random, mesh}\hlopt{$}\hlstd{n),}
\hlkwc{axis.args}\hlstd{=}\hlkwd{list}\hlstd{(}\hlkwc{cex.axis}\hlstd{=}\hlnum{0.75}\hlstd{))}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in which(mesh\$loc[, 1] >= 0 \& mesh\$loc[, 1] <= 47 \& mesh\$loc[, 2] >= : object 'mesh' not found}}\end{kframe}
\end{knitrout}
Lastly, it's useful to check the hyperparameter estimates to make sure they are sensible. The hyperparameters for the macrotransition entry models (and shot probability model) and log precision terms for the CAR model, described in Sections 4.1 and 4.2 of the paper. In this implementation, we've fixed the hyperparameters for all spatial basis loadings to be the same within each macrotransition entry model.
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{inla.out}\hlopt{$}\hlstd{mode}\hlopt{$}\hlstd{theta} \hlcom{# parameter values}
\end{alltt}
\begin{verbatim}
## [1] -2.36010142 -0.04486083 0.53744864 4.00845149 -0.12840774 0.46386675 0.88176478 -0.21073187
\end{verbatim}
\begin{alltt}
\hlstd{inla.out}\hlopt{$}\hlstd{mode}\hlopt{$}\hlstd{theta.tags} \hlcom{# parameter names}
\end{alltt}
\begin{verbatim}
## [1] "Log precision for p.int" "Log precision for p.dribble"
## [3] "Log precision for p.ndef" "Log precision for p.ball.lastsec"
## [5] "Log precision for p.doff1" "Log precision for p.doff2"
## [7] "Log precision for p.doff3" "Log precision for p.ddef"
\end{verbatim}
\end{kframe}
\end{knitrout}
\subsection{Transition probability matrices}
The last model component needed to calculate EPV are the transition probability matrices for $C_t$, described in Section 3.4 of the paper. We load these---for instance, for Dwyane Wade, by running:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{player.id} \hlkwb{<-} \hlstd{players}\hlopt{$}\hlstd{player_id[}\hlkwd{grep}\hlstd{(}\hlstr{"Wade"}\hlstd{, players}\hlopt{$}\hlstd{lastname)]}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/tmats/%s.Rdata"}\hlstd{, data.dir, player.id))}
\hlkwd{names}\hlstd{(tmat.ind)}
\end{alltt}
\begin{verbatim}
## [1] "micros" "passes1" "passes2" "passes3" "passes4" "absorbs"
\end{verbatim}
\end{kframe}
\end{knitrout}
\texttt{tmat.ind} is a list with each element representing blocks (sub-matrices) of $\tilde{\mathbf{N}}$, the transition count matrix for $C_t$ given the players on the court (see Section 3.4 of the paper). The rows in each block represent the 14 \{\texttt{region}\} $\times$ \{defended\} states we use in $C_t$ for a given ballcarrier, as expalined in Section 2.2 of the paper. Columns in these blocks also represent such states, except for the \texttt{absorbs} block, where columns represent absorbing states in $\mathcal{C}_{\text{end}}$. Depending on the lineup used, different blocks will be used to construct $\mathbf{P}$. Also note, the \texttt{tmat.pos} object contains blocks used in calculating EPV-Added, as discussed in Section A.4 of the paper.
\section{Calculating EPV}
\subsection{Coarsened state expected point values}
Given estimates of our parameters, EPV is calculated using Monte Carlo. The general idea, introduced in Section 3 of the paper, is to alternate draws from the micro- and macrotransition entry models until a macrotransition (pass, shot attempt, turnover) occurs. Then, given the predicted outcome of this macrotransition, we calculate EPV using the transition probability matrix of coarsened states. Before actually simulating EPV draws, it's useful to look at what the expected point values are of each coarsened state, as EPV will always be a weighted average of these values:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/parameters.R"}\hlstd{, code.dir))}
\hlstd{hyper} \hlkwb{<-} \hlkwd{getHyperParams}\hlstd{(tdat)} \hlcom{# makes sure all parameter inference is loaded}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(tdat): object 'inla.names' not found}}\begin{alltt}
\hlstd{ev.out} \hlkwb{<-} \hlkwd{evLineups}\hlstd{(tdat)} \hlcom{# coarsened state EVs for each offensive lineup in tdat}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in evLineups(tdat): object 'state\_nms' not found}}\end{kframe}
\end{knitrout}
% change "teammates.all" to "lineups"!
In \texttt{ev.out}, \texttt{teammates.all} is a matrix of 5-man lineups that appear in \texttt{tdat} (there may be duplicate rows). For instance, we have the starting 5 for the Miami Heat:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{lineup.ids} \hlkwb{<-} \hlstd{ev.out}\hlopt{$}\hlstd{teammates.all[}\hlnum{2}\hlstd{, ]}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in eval(expr, envir, enclos): object 'ev.out' not found}}\begin{alltt}
\hlstd{this.lineup} \hlkwb{<-} \hlstd{players[}\hlkwd{match}\hlstd{(lineup.ids, players}\hlopt{$}\hlstd{player_id), ]}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in match(lineup.ids, players\$player\_id): object 'lineup.ids' not found}}\begin{alltt}
\hlstd{this.lineup[,} \hlnum{2}\hlopt{:}\hlnum{4}\hlstd{]}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in eval(expr, envir, enclos): object 'this.lineup' not found}}\end{kframe}
\end{knitrout}
For each 5-man lineup, there are $5 \times 2 \text{ (defended or not) } \times 7 \text{ (court regions) } = 70$ coarsened state expected values. To check these for LeBron James' possession states, for instance, we'd do:
\begin{footnotesize}
\begin{kframe}
\begin{alltt}
\hlstd{lineup.states} \hlkwb{<-} \hlkwd{paste}\hlstd{(}\hlkwd{rep}\hlstd{(this.lineup}\hlopt{$}\hlstd{lastname,} \hlkwc{each}\hlstd{=}\hlnum{14}\hlstd{), state_nms)} \hlcom{# state names}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in paste(rep(this.lineup\$lastname, each = 14), state\_nms): object 'this.lineup' not found}}\begin{alltt}
\hlkwd{xtable}\hlstd{(}\hlkwd{data.frame}\hlstd{(}\hlkwc{state}\hlstd{=lineup.states,} \hlkwc{EV}\hlstd{=ev.out}\hlopt{$}\hlstd{evs[[}\hlnum{2}\hlstd{]])[}\hlkwd{grep}\hlstd{(}\hlstr{"James"}\hlstd{, lineup.states), ],} \hlkwc{digits}\hlstd{=}\hlnum{2}\hlstd{)}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in data.frame(state = lineup.states, EV = ev.out\$evs[[2]]): object 'lineup.states' not found}}\end{kframe}
\end{footnotesize}
These results seem pretty sensible, as, for instance, EVs are uniformly higher for uncontested states, with the difference especially great within the restricted area (1.60 versus 1.42) and corner 3 (1.21 versus 1.09). Note that with different teammates, we would see slightly different EVs for these states.
\subsection{EPV curves}
As mentioned in the paper, given estimates of all parameter values, EPV is computed by Monte Carlo sampling from the multiresolution transition models. This is a computationally expensive procedure, dominated by computing spatial effects for every player-position update from the microtransition model and hazard calculation from the macrotransition entry/exit models. However, it is straightforward to sample multiple time points together.
To supply EPV curves for a full game, it's most efficient to draw a single EPV estimate for all time points in a game, and then parallelize this across multiple machines that don't need to share memory. The code executes an EPV draw at each time point for every offensive possession in our sample game:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/EPV_calcs.R"}\hlstd{, code.dir))}
\hlstd{draw.raw} \hlkwb{<-} \hlkwd{multiresDraw}\hlstd{(tdat, hyper, def.micro, ev.out,} \hlkwc{nmic}\hlstd{=}\hlnum{50}\hlstd{,} \hlkwc{save.positions}\hlstd{=F)}
\hlstd{draw} \hlkwb{<-} \hlkwd{compressEPV}\hlstd{(tdat, draw.raw}\hlopt{$}\hlstd{fv.epv.list)}
\end{alltt}
\end{kframe}
\end{knitrout}
The \texttt{nmic} argument specifies 50 iterations (2 seconds) of the microtranistion model, which is usually sufficient to observe $\tau_t$, a macrotransition entry. The \texttt{save.positions} argument stores the player-position innovations supplied by the micro model. These are necessary to reproduce Figure 7, which shows players' predicted motion paths, but necessitate lots of additional storage, as they essentially replicate the full positional data \texttt{nmic} times for each EPV draw.
We can load a pre-computed version of \texttt{draw}:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/draw.Rdata"}\hlstd{, data.dir))}
\hlkwd{names}\hlstd{(draw)}
\end{alltt}
\begin{verbatim}
## [1] "epv" "probs" "vals" "probs.now" "vals.now"
\end{verbatim}
\end{kframe}
\end{knitrout}
Here, \texttt{epv} is a vector of EPV values corresponding to each row of \texttt{tdat}. \texttt{probs} is a data frame where each row gives the probabilities associated with each possible macrotransition event at time $t$ ($\mathbb{P}(C_{\delta_t} | \mathcal{F}^{(Z)}_t)$), and \texttt{vals} gives the associated expected point values conditional on these macrotransitions: $\mathbb{E}[X | C_{\delta_t}]$ (these probabilities/values are illustrated in Figure 7 of the paper). \texttt{probs} and \texttt{vals} contain an ``\texttt{other}'' state which represents no macrotransition occurring within the \texttt{50} simulated microtransitions. In this case, to calculate the expected value, we use the coarsened state expected value associated with the final microtransition draw, in this case $\mathbb{E}[X | C_{t + 2}]$. \texttt{probs.now} and \texttt{vals.now} are the instantaneous macrotransition probabilities and associated expected values.
For instance, during the first possession in this game, after Udonis Haslem brings the ball into the offensive halfcourt, we see the next action to most likely be pass to Chalmers or Chris Bosh (the next play is a pass to Bosh). A shot attempt is extremely unlikely, and there is a 0.226 probability that Haslem will still possess the ball 2 seconds down the road. Among his passing options, James is the most valuable, though also the least likely to occur (James is near the basket, but the passing lane doesn't appear to be open).
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{transformed.data.plotter}\hlstd{(tdat,} \hlnum{30}\hlstd{)}
\end{alltt}
\end{kframe}\begin{figure}[H]
{\centering \includegraphics[width=\maxwidth]{figure/epv_example_1-1}
}
\caption[Udonis Haslem with ball possession]{Udonis Haslem with ball possession. His four passing options are 1: Mario Chalmers, 2: Dwyane Wade, 3: LeBron James, 4: Chris Bosh.}\label{fig:epv_example_1}
\end{figure}
\begin{kframe}\begin{alltt}
\hlstd{draw}\hlopt{$}\hlstd{probs[}\hlnum{30}\hlstd{, ]}
\end{alltt}
\begin{verbatim}
## pass1 pass2 pass3 pass4 make miss TO other
## 30 0.5747731 0.02065291 0.01900831 0.1448298 0.000981215 0.00157953 0.01181597 0.2263592
\end{verbatim}
\begin{alltt}
\hlstd{draw}\hlopt{$}\hlstd{vals[}\hlnum{30}\hlstd{, ]}
\end{alltt}
\begin{verbatim}
## pass1 pass2 pass3 pass4 make miss TO other
## 30 0.9676442 1.017705 1.105218 1.014151 3 0.15 0 0.9707326
\end{verbatim}
\end{kframe}
\end{knitrout}
Because each EPV draw executes independent multiresolution transition simulations for each time point $t$, the resultant EPV curve is not very smooth. For instance, at time $t$, we might simulate a player driving toward the basket and attempting a layup, whereas at time $t+\epsilon$ we simulate the same player passing to a teammate. We see this below:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{plot}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{tdat}\hlopt{$}\hlstd{game_clock[}\hlnum{1}\hlopt{:}\hlnum{100}\hlstd{], draw}\hlopt{$}\hlstd{epv[}\hlnum{1}\hlopt{:}\hlnum{100}\hlstd{],} \hlkwc{xlab}\hlstd{=}\hlstr{"game clock"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"EPV"}\hlstd{)}
\end{alltt}
\end{kframe}\begin{figure}[H]
{\centering \includegraphics[width=\maxwidth]{figure/epv_draw_plot-1}
}
\caption[EPV estimates of a single draw]{EPV estimates of a single draw}\label{fig:epv_draw_plot}
\end{figure}
\end{knitrout}
Of course, averaging over multiple EPV draws offers more smoothness---though by design, we see spikes in EPV exactly at moments when passes/shots/turnovers occur. The files \texttt{EPV\_draw.R} and \texttt{combine\_draws.R} execute independent EPV draws for this game on a computing cluster. We have combined 200 of these draws to obtain a final Monte Carlo EPV estimate (as well as Monte Carlo estimates of the transition probabilities and values). Below we load this, and merge these EPV estimates into the original full data set \texttt{dat}, where EPV is \texttt{NA} when the ball is not in the offensive halfcourt with the game clock moving. We also compute a "smoothed EPV" to (very slightly) interpolate the pointwise EPV estimates over time.
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{source}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/EPV_calcs.R"}\hlstd{, code.dir))}
\hlkwd{load}\hlstd{(}\hlkwd{sprintf}\hlstd{(}\hlstr{"%s/combined.epv.draws.Rdata"}\hlstd{, data.dir))}
\hlstd{e.dat} \hlkwb{<-} \hlkwd{combineDatEPV}\hlstd{(dat, epv.table)}
\end{alltt}
\end{kframe}
\end{knitrout}
We can now plot out EPV ``tickers'', as in Figure 2 of the paper:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{par}\hlstd{(}\hlkwc{xpd}\hlstd{=}\hlnum{NA}\hlstd{,} \hlkwc{bty}\hlstd{=}\hlstr{"n"}\hlstd{,} \hlkwc{mfrow}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{1}\hlstd{,} \hlnum{2}\hlstd{))}
\hlstd{poss.1} \hlkwb{<-} \hlkwd{which}\hlstd{(e.dat}\hlopt{$}\hlstd{possID} \hlopt{==} \hlnum{1}\hlstd{)}
\hlkwd{plot}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{e.dat}\hlopt{$}\hlstd{game_clock[poss.1], e.dat}\hlopt{$}\hlstd{epv.smooth[poss.1],}
\hlkwc{xlab}\hlstd{=}\hlstr{"game clock"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"EPV"}\hlstd{,} \hlkwc{type}\hlstd{=}\hlstr{"l"}\hlstd{,} \hlkwc{lwd}\hlstd{=}\hlnum{2}\hlstd{,} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{.5}\hlstd{,} \hlnum{1.5}\hlstd{))}
\hlkwd{points}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{e.dat}\hlopt{$}\hlstd{game_clock[poss.1], e.dat}\hlopt{$}\hlstd{epv[poss.1],} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.5}\hlstd{)}
\hlstd{poss.90} \hlkwb{<-} \hlkwd{which}\hlstd{(e.dat}\hlopt{$}\hlstd{possID} \hlopt{==} \hlnum{90}\hlstd{)} \hlcom{# possession shown in paper}
\hlkwd{plot}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{e.dat}\hlopt{$}\hlstd{game_clock[poss.90], e.dat}\hlopt{$}\hlstd{epv.smooth[poss.90],}
\hlkwc{xlab}\hlstd{=}\hlstr{"game clock"}\hlstd{,} \hlkwc{ylab}\hlstd{=}\hlstr{"EPV"}\hlstd{,} \hlkwc{type}\hlstd{=}\hlstr{"l"}\hlstd{,} \hlkwc{lwd}\hlstd{=}\hlnum{2}\hlstd{,} \hlkwc{ylim}\hlstd{=}\hlkwd{c}\hlstd{(}\hlnum{.5}\hlstd{,} \hlnum{1.5}\hlstd{))}
\hlkwd{points}\hlstd{(}\hlnum{720} \hlopt{-} \hlstd{e.dat}\hlopt{$}\hlstd{game_clock[poss.90], e.dat}\hlopt{$}\hlstd{epv[poss.90],} \hlkwc{pch}\hlstd{=}\hlnum{20}\hlstd{,} \hlkwc{cex}\hlstd{=}\hlnum{0.5}\hlstd{)}
\end{alltt}
\end{kframe}\begin{figure}[H]
{\centering \includegraphics[width=\maxwidth]{figure/epv_ticker_1-1}
}
\caption[EPV curves for two possessions in this game]{EPV curves for two possessions in this game. The line slightly smooths the actual EPV values (dots).}\label{fig:epv_ticker_1}
\end{figure}
\end{knitrout}
One of the best ways to view EPV results is by generating gifs that show EPV curves side-by-side with the possession evolution. Below we've generated gifs for a pair of long and interesting-looking possessions (they're located in the \texttt{gifs} folder):
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlkwd{makeGIF}\hlstd{(e.dat,} \hlkwd{which}\hlstd{(e.dat}\hlopt{$}\hlstd{possID} \hlopt{==} \hlnum{12}\hlstd{),} \hlstr{"poss_12"}\hlstd{)} \hlcom{# takes a few minutes}
\hlkwd{makeGIF}\hlstd{(e.dat,} \hlkwd{which}\hlstd{(e.dat}\hlopt{$}\hlstd{possID} \hlopt{==} \hlnum{24}\hlstd{),} \hlstr{"poss_24"}\hlstd{)} \hlcom{# takes a few minutes}
\end{alltt}
\end{kframe}
\end{knitrout}
\subsection{Derived metrics}
The derived metrics presented in the paper, EPV-Added (EPVA) and shot satisfaction, are most meaningful when computed using a large sample of data, such as a full season. However, just as with any other basketball metric, we can calculate per-game versions of these statistics. For instance, to get these metrics for LeBron James and Deron Williams, we'd do:
\begin{knitrout}\footnotesize
\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe}
\begin{alltt}
\hlstd{id} \hlkwb{<-} \hlstd{players}\hlopt{$}\hlstd{player_id[}\hlkwd{grep}\hlstd{(}\hlstr{"LeBron"}\hlstd{, players}\hlopt{$}\hlstd{firstname)]}
\hlkwd{sum}\hlstd{(}\hlkwd{EPVA}\hlstd{(tdat, id))} \hlcom{# sums EPV added on each touch}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(id.dat): object 'inla.names' not found}}\begin{alltt}
\hlkwd{mean}\hlstd{(}\hlkwd{shotSatis}\hlstd{(tdat, id))} \hlcom{# averages shot satisfaction of each touch}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(id.dat): object 'inla.names' not found}}\begin{alltt}
\hlstd{id} \hlkwb{<-} \hlstd{players}\hlopt{$}\hlstd{player_id[}\hlkwd{grep}\hlstd{(}\hlstr{"Deron"}\hlstd{, players}\hlopt{$}\hlstd{firstname)]} \hlcom{# Deron Williams}
\hlkwd{sum}\hlstd{(}\hlkwd{EPVA}\hlstd{(tdat, id))} \hlcom{# sums EPV added on each touch}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(id.dat): object 'inla.names' not found}}\begin{alltt}
\hlkwd{mean}\hlstd{(}\hlkwd{shotSatis}\hlstd{(tdat, id))} \hlcom{# averages shot satisfaction of each touch}
\end{alltt}
{\ttfamily\noindent\bfseries\color{errorcolor}{\#\# Error in getHyperParams(id.dat): object 'inla.names' not found}}\end{kframe}
\end{knitrout}
\section{Appendix}
Raw data \texttt{event\_id} codes:
\begin{table}[h!]
\begin{tabular}{lr|lr|lr|lr|lr}
\toprule
Event & ID & Event & ID & Event & ID & Event & ID & Event & ID \\
\midrule
FT Made & 1 & Def. Rebound & 6 & Timeout & 11 & Clock Sync & 16 & Dribble & 21 \\
FT Missed & 2 & Turnover & 7 & Jump Ball & 12 & Instant Replay & 17 & Pass & 22 \\
Shot Made & 3 & Foul & 8 & Ejection & 13 & Replay Ruling & 18 & Possession & 23 \\
Shot Missed & 4 & Violation & 9 & Start Period & 14 & Game Over & 19 & Shot Block & 24 \\
Off. Rebound & 5 &
Substitution & 10 &
End Period & 15 &
Stoppage & 20 &
Assist & 25 \\
\bottomrule
\end{tabular}
\label{tab:event_codes}
\caption{Glossary of \texttt{event\_id} codes in optical tracking data.}
\end{table}
\end{document}