clear clear matrix set matsize 800 set mem 500m cd [your directory] use Lahman team hitting // fix HBP replace HBP = . if HBP == 0 replace SF= . if SF ==0 replace SH = . if SH==0 *** merge HBP merge 1:1 yearID teamID lgID using lahman hbp sh sf data [derived from summing individ player totals], update drop _merge //drop non AL/NL drop if lgID != "AL" & lgID != "NL" //rename hit variables rename B doub rename S trip merge m:m yearID teamID using bb ref team hitting patch, update drop _merge // merge bbref ops_plus merge m:m yearID teamID using bbref ops_plus coded data drop _merge // generate singles gen sing = H-(doub+trip) // runs per game & runs allowed per game gen rpg=(R/IPouts)*27 gen rapg=(RA/IPouts)*27 /// gen PAs gen PA =(AB+H+BB+HBP+SF+SH) *** generate SLG *** gen slg=(sing+2*doub+3*trip+4*HR)/AB *** generate opb *** gen obp =(H+BB+HBP)/(AB+BB+HBP+SF) *** generate ops *** gen ops=obp+slg ** sops_II: standardized ops formed by adding standardize slg & standardized obp bysort yearID: egen mean_obp = mean(obp) bysort yearID: egen sd_obp = sd(obp) gen z_obp = (obp - mean_obp) / sd_obp bysort yearID: egen mean_slg = mean(slg) bysort yearID: egen sd_slg = sd(slg) gen z_slg = (slg - mean_slg) / sd_slg gen sops_2 = z_slg+z_obp /// cronbach's alpha sops_2 alpha z_slg z_obp /// standardize by season bysort yearID: egen mean_ops_plus = mean(ops_plus) bysort yearID: egen sd_ops_plus = sd(ops_plus) gen z_ops_plus = (ops_plus - mean_ops_plus) / sd_ops_plus bysort yearID: egen mean_sops_2 = mean(sops_2) bysort yearID: egen sd_sops_2 = sd(sops_2) gen z_sops_2 = (sops_2 - mean_sops_2) / sd_sops_2 bysort yearID: egen mean_rpg = mean(rpg) bysort yearID: egen sd_rpg = sd(rpg) gen z_rpg = (rpg - mean_rpg) / sd_rpg bysort yearID: egen mean_ops = mean(ops) bysort yearID: egen sd_ops = sd(ops) gen z_ops = (ops - mean_ops) / sd_ops /// examine overall R2s regress z_rpg z_ops_plus regress z_rpg z_ops regress z_rpg z_sops_2 **** measure season-by-season r2 gen Rsops_2=. gen Rops_plus=. gen Rops=. foreach yr of numlist 1900/2024 { regress rpg ops_plus if year == `yr' local R2 = e(r2) replace Rops_plus = `R2' if year == `yr' regress rpg sops_2 if year == `yr' local R2 = e(r2) replace Rsops_2 = `R2' if year == `yr' regress rpg ops if year == `yr' local R2 = e(r2) replace Rops = `R2' if year == `yr' } // scale for % variance explained gen Rsops_2_100=Rsops_2*100 gen Rops_plus_100=Rops_plus*100 gen Rops_100=Rops*100 twoway /// (lpolyci Rsops_2_100 year, fcolor(gs14%80) lwidth(none) bwidth(10)) /// // Gray area for ci (lpoly Rsops_2_100 year, lcolor(black) lpattern(shortdash) lwidth(thin) bwidth(10)) /// (lpolyci Rops_plus_100 year, fcolor(green%20) lwidth(none) bwidth(10)) /// (lpoly Rops_plus_100 year, lcolor(green) lpattern(shortdash) lwidth(thin) bwidth(10)) /// (lpolyci Rops_100 year, fcolor(ltblue%80) lwidth(none) bwidth(10)) /// (lpoly Rops_100 year, lcolor(blue) lpattern(shortdash) lwidth(thin) bwidth(10)), /// ytitle("R-squared (scaled x100)") /// xtitle("Year") /// ylabel( 40 "40%" 50 "50%" 60 "60%" 70 "70%" 80 "80%" 90 "90%" 100 "100%", labsize(medium) nogrid) /// xlabel(1900(10)2024, angle(45) labsize(medium) nogrid) /// legend(off) /// graphregion(color(white)) /// plotregion(margin(zero))