-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcompletion.R
More file actions
129 lines (103 loc) · 3.54 KB
/
completion.R
File metadata and controls
129 lines (103 loc) · 3.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
library(dplyr)
# ============================================================
# 1. LOAD DATA
# ============================================================
full <- read.csv("data/full_predictions.csv")
supp <- read.csv("data/supplementary_data.csv")
# Make IDs character to avoid join problems
full <- full %>% mutate(
game_id = as.character(game_id),
play_id = as.character(play_id),
nfl_id = as.character(nfl_id)
)
supp <- supp %>% mutate(
game_id = as.character(game_id),
play_id = as.character(play_id)
)
# Keep only play-level fields we need
supp_small <- supp %>%
select(game_id, play_id, pass_result)
# ============================================================
# 2. ADD PASS RESULT LABEL
# ============================================================
full <- full %>%
left_join(supp_small, by = c("game_id","play_id"))
# Convert to binary (corrected column name: pass_result.y)
full <- full %>%
mutate(
pass_result_bin = ifelse(pass_result.y == "C", 1L, 0L)
)
# ============================================================
# 3. EXTRACT WR-TARGETED FINAL PRE-CATCH FRAME
# ============================================================
wr_frames <- full %>%
filter(player_role == "Targeted Receiver",
player_position == "WR") %>%
group_by(game_id, play_id) %>%
slice_max(frame_id) %>% # last pre-ball frame
ungroup()
# ============================================================
# 4. FEATURE ENGINEERING
# ============================================================
wr_frames <- wr_frames %>%
mutate(
dist_to_sideline = pmin(y, 53.3 - y),
is_slot = if_else(y > 15 & y < (53.3 - 15), 1L, 0L)
)
# Tracking-only feature list
tracking_features <- c(
"nearest_defender_dist",
"nearest_def_s",
"rel_speed",
"rel_dir",
"defender_angle",
"ball_angle",
"s","a","dx","dy",
"ball_land_x","ball_land_y",
"dist_to_sideline","is_slot"
)
# ============================================================
# 5. REMOVE NA/INF IN FEATURES
# ============================================================
wr_frames[tracking_features] <- lapply(
wr_frames[tracking_features],
function(col) {
col[!is.finite(col)] <- NA
col[is.na(col)] <- 0
col
}
)
# ============================================================
# 6. FIT LOGISTIC MODEL (tracking only)
# ============================================================
catch_model <- glm(
pass_result_bin ~ .,
data = wr_frames[, c("pass_result_bin", tracking_features)],
family = binomial()
)
summary(catch_model)
# ============================================================
# 7. GENERATE CATCH PROBABILITIES FOR ALL WR FRAMES
# ============================================================
wr_frames$catch_prob <- predict(
catch_model,
newdata = wr_frames,
type = "response"
)
# ============================================================
# 8. OUTPUT CLEAN CATCH PROBABILITY FILE
# ============================================================
# ============================================================
# 8A. KEEP ONLY COLUMNS USED IN MODEL + IDs
# ============================================================
keep_cols <- c(
"game_id","play_id","nfl_id","frame_id",
"player_name","player_position","player_role","player_side",
"pass_result_bin","catch_prob",
tracking_features
)
wr_frames_clean <- wr_frames[, keep_cols]
write.csv(wr_frames_clean, "data/catch_probability.csv", row.names = FALSE)
cat("Catch probability file saved as data/catch_probability.csv\n")
catch_probs <- wr_frames %>%
dplyr::select(game_id, play_id, nfl_id, catch_prob)