clear clear matrix set matsize 800 set mem 500m cd [your partition] use [team_ops_r_pa data] merge 1:1 teamid yearID using [bbref_team_rbat] // bbr rbat team level drop _merge merge 1:1 teamid yearID using [fg_team_runs_produced] // fg wRC and wRAA team level drop _merge merge 1:1 team yearID using [team_records data] drop _merge merge 1:1 teamID yearID using [team_run_allowed_diff] drop _merge merge 1:1 teamID yearID using [bbr_offwar]// bbr team war components drop _merge rename offwar bbr_offwar merge 1:1 teamID yearID using [fg_offwar] // fg offensive war drop _merge drop if year >2024 merge 1:1 team yearID using [team_ops_estimated_run_produciton] drop _merge drop if r==. gen rpg=r/g // runs per game gen rppa=. gen erpg=e_r/g rename dWAR bbr_dwar rename pwar bbr_pwar bysort yearID: egen mean_wrc = mean(wrc) bysort yearID: egen sd_wrc = sd(wrc) gen z_wrc = (wrc - mean_wrc) / sd_wrc bysort yearID: egen mean_ops = mean(ops) bysort yearID: egen sd_ops = sd(ops) gen z_ops = (ops - mean_ops) / sd_ops bysort yearID: egen mean_bbr_offwar = mean(bbr_offwar) bysort yearID: egen sd_bbr_offwar = sd(bbr_offwar) gen z_bbr_offwar = (bbr_offwar - mean_bbr_offwar) / sd_bbr_offwar bysort yearID: egen mean_rbat = mean(rbat) bysort yearID: egen sd_rbat = sd(rbat) gen z_rbat = (rbat - mean_rbat) / sd_rbat bysort yearID: egen mean_wraa = mean(wraa) bysort yearID: egen sd_wraa = sd(wraa) gen z_wraa = (wraa - mean_wraa) / sd_wraa bysort yearID: egen mean_e_r = mean(e_r) bysort yearID: egen sd_e_r = sd(e_r) gen z_e_r = (e_r - mean_e_r) / sd_e_r bysort yearID: egen mean_rpg = mean(rpg) bysort yearID: egen sd_rpg = sd(rpg) gen z_rpg = (rpg - mean_rpg) / sd_rpg bysort yearID: egen mean_e_raa = mean(e_raa) bysort yearID: egen sd_e_raa = sd(e_raa) gen z_e_raa = (e_raa - mean_e_raa) / sd_e_raa drop if year ==2025 **** derive OPS WAA *** * League averages bysort yearID: egen avg_er = mean(e_r) * League run environment for exponent bysort yearID: egen lg_rpg = mean((r + ra) / g) gen pexp = lg_rpg^0.287 * Team's expected wins with actual offense gen p_pct = (e_r^2) / (e_r^2 + ra^2) gen p_wins = p_pct * g * Expected wins with league average offense gen pavg_pct = (avg_er^2) / (avg_er^2 + ra^2) gen pavg_w = pavg_pct * g * Offensive wins above average gen o_waa = p_wins - pavg_w * Labels label var o_waa "Off WAA (Pyth)" *** regressions // runs per game on runs produced > avg regres z_rpg z_rbat regress z_rpg z_e_raa regress z_rpg z_wraa // runs per game on wins measures regress z_rpg fg_offwar regress z_rpg bbr_offwar regress z_rpg o_waa /// figures gen R2_fg = . gen R2_er = . gen R2_brb =. foreach yr of numlist 1900/2024 { // Adjust the range to your data // R2_un the regression with z_FIP only for the current year regress r e_raa if year == `yr' // Store season r2s for prun local r2_1 = e(r2) replace R2_er = `r2_1' if year == `yr' // R2_un the regression with z_FIP only for the current year regress r wraa if year == `yr' // Store R2_2 from the first model in R2_21 for the current year local r2_1 = e(r2) replace R2_fg = `r2_1' if year == `yr' // R2_un the regression with z_FIP only for the current year regress r rbat if year == `yr' // Store R2_2 from the first model in R2_21 for the current year local r2_1 = e(r2) replace R2_brb = `r2_1' if year == `yr' } preserve keep yearID R2_er R2_brb R2_fg gen R2_fg_100=R2_fg*100 // gen R2_wr_100=R2_wr*100 gen R2_er_100=R2_er*100 gen R2_brb_100=R2_brb*100 twoway /// (lpoly R2_fg_100 year, lcolor(green) bwidth(5) lpattern(dash) ) /// (lpoly R2_brb_100 year, lcolor(red) bwidth(5) lpattern(dash) ) /// (lpoly R2_er_100 year, lcolor(blue) bwidth(5) lpattern(dash) ), /// ytitle("R2_-squared (scaled x100)") /// xtitle("Year") /// ylabel(40 "40%" 50 "50%" 60 "60%" 70 "70%" 80 "80%" 90 "90%" 100 "100%", labsize(medium) nogrid) /// xlabel(1900(10)2024, angle(45) labsize(medium) nogrid) /// legend(off) /// graphregion(color(white)) /// plotregion(margin(zero)) aspect(0.6) /// restore gen R2_fwar = . gen R2_owaa = . gen R2_bwar =. foreach yr of numlist 1900/2024 { // Adjust the range to your data // R2_un the regression with z_FIP only for the current year regress r fg_offwar if year == `yr' // Store season r2s for prun local r2_1 = e(r2) replace R2_fwar = `r2_1' if year == `yr' // R2_un the regression with z_FIP only for the current year regress r o_waa if year == `yr' // Store R2_2 from the first model in R2_21 for the current year local r2_1 = e(r2) replace R2_owaa = `r2_1' if year == `yr' // R2_un the regression with z_FIP only for the current year regress r bbr_offwar if year == `yr' // Store R2_2 from the first model in R2_21 for the current year local r2_1 = e(r2) replace R2_bwar = `r2_1' if year == `yr' } preserve keep yearID R2_bwar R2_fwar R2_owaa R2_brb gen R2_fwar_100=R2_fwar*100 // gen R2_wr_100=R2_wr*100 gen R2_owaa_100=R2_owaa*100 gen R2_bwar_100=R2_bwar*100 gen R2_brb_100=R2_brb*100 twoway /// (lpoly R2_fwar_100 year, lcolor(green) bwidth(5) lpattern(dash) ) /// (lpoly R2_bwar_100 year, lcolor(red) bwidth(5) lpattern(dash) ) /// (lpoly R2_owaa_100 year, lcolor(blue) bwidth(5) lpattern(dash) ), /// ytitle("R2_-squared (scaled x100)") /// xtitle("Year") /// ylabel(40 "40%" 50 "50%" 60 "60%" 70 "70%" 80 "80%" 90 "90%" 100 "100%", labsize(medium) nogrid) /// xlabel(1900(10)2024, angle(45) labsize(medium) nogrid) /// legend(off) /// graphregion(color(white)) /// plotregion(margin(zero)) aspect(0.6) /// restore preserve keep yearID R2_bwar R2_brb twoway /// (lpoly R2_brb_100 year, lcolor(red) bwidth(5) lpattern(solid) ) /// (lpoly R2_bwar_100 year, lcolor(red) bwidth(5) lpattern(dash) ), /// ytitle("R2_-squared (scaled x100)") /// xtitle("Year") /// ylabel(40 "40%" 50 "50%" 60 "60%" 70 "70%" 80 "80%" 90 "90%" 100 "100%", labsize(medium) nogrid) /// xlabel(1900(10)2024, angle(45) labsize(medium) nogrid) /// legend(off) /// graphregion(color(white)) /// plotregion(margin(zero)) aspect(0.6) /// restore