clear clear matrix set matsize 800 set mem 500m cd [your drive partition] use [ops_PAs_1900_2024 data file] ** DQ steroid user--or not, up to you!** replace player = "STEROID DQ" if inlist(player, /// "Barry Bonds", "Sammy Sosa", "Manny Ramírez", /// "Álex Rodríguez", "Mark McGwire", "Jason Giambi","Rafael Palmeiro") * 3. Convert numeric variables to string with desired formats gen str7 ops_fmt = string(ops,"%6.3f") replace ops_fmt = substr(ops_fmt,2,.) if inrange(ops,0,1) *** import regression coefficients ** merge m:m yearID using [rppa_regression_coeffficients data file] drop _merge *** unstandardized seasons **** gen rp=(ops*b_r+cons)*pa // run production bysort yearID: egen mean_ops = mean(ops) gen rp_aa=rp-((mean_ops*b_r+cons)*pa) // run prouction > avg ***standardized seasons*** * 1. Summarize pa within each year egen total_pa = sum(pa), by(year) * 2. Compute weighted mean of ops generate double weighted_ops_product = ops * pa egen double total_weighted_ops = sum(weighted_ops_product), by(year) generate double weighted_mean_ops = total_weighted_ops / total_pa * 3. Compute weighted sum of squared deviations * (ops - weighted_mean_ops)^2 * pa generate double wssd = (ops - weighted_mean_ops)^2 * pa egen double total_wssd = sum(wssd), by(year) * 4. population variance: divide by total weight generate double popvar_ops = total_wssd / total_pa * 5. Population weighted standard deviation generate double popsd_ops = sqrt(popvar_ops) drop wssd weighted_ops_product total_weighted_ops rename weighted_mean_ops wvar_ops rename popsd_ops wsd_ops drop total_wssd gen pr_rppa = rp/pa * 2. Compute weighted mean of pr_rppa generate double weighted_pr_rppa_product = pr_rppa * pa egen double total_weighted_pr_rppa = sum(weighted_pr_rppa_product), by(year) generate double weighted_mean_pr_rppa = total_weighted_pr_rppa / total_pa * 3. Compute weighted sum of squared deviations * (pr_rppa - weighted_mean_pr_rppa)^2 * pa generate double wssd = (pr_rppa - weighted_mean_pr_rppa)^2 * pa egen double total_wssd = sum(wssd), by(year) * 4. population variance: divide by total weight generate double popvar_pr_rppa = total_wssd / total_pa * 5. Population weighted standard deviation generate double ppr_rppad_pr_rppa = sqrt(popvar_pr_rppa) drop wssd weighted_pr_rppa_product total_weighted_pr_rppa rename weighted_mean_pr_rppa wvar_pr_rppa rename ppr_rppad_pr_rppa wsd_pr_rppa drop total_wssd gen z_ops=(ops-wvar_ops)/wsd_ops gen srp=((z_ops*b_z)*.042+.116)*pa // standardized run production gen srp_aa=((z_ops*b_z)*.042+.04)*pa // standardized run production > avg **** save *** gsort -srp_aa gen s_rank =_n gsort -rp_aa gen raw_rank= _n gen rpaa_100=rp_aa/pa*100 // runs produced > avg per 100 PAs export excel /// raw_rank s_rank player year ops_fmt rpaa_100 rp_aa srp_aa /// using "season_run_production.xlsx", /// firstrow(variables) replace *** career runs produced *** preserve collapse (sum) pa rp_aa srp_aa,by(player) rename rp_aa c_rp_aa rename srp_aa c_srp_aa gsort -c_srp_aa gen c_s_rank =_n gsort -c_rp_aa gen c_raw_rank= _n gen crpaa_100=c_rp_aa/pa*100 // runs produced > avg per 100 PAs export excel /// c_raw_rank c_s_rank player crpaa_100 c_rp_aa c_srp_aa /// using "career_run_production.xlsx", /// firstrow(variables) replace restore **** examining means & SDs *** twoway /// (lpoly wvar_pr_rppa year, bw(5) lpattern(dash)) /// , /// ylabel(.1(.005).125, nogrid ) xlabel(1900(10)2024, angle(45) nogrid) /// title("") legend(off) twoway /// (lpoly wsd_pr_rppa year, bw(5) lpattern(dash)) /// , /// ylabel(, nogrid ) xlabel(1900(10)2024, angle(45) nogrid) /// title("") legend(off) gen cv_pr_rppa=wsd_pr_rppa/wvar_pr_rppa twoway /// (lpoly cv_pr_rppa year, bw(5) lpattern(dash)) /// , /// ylabel(, nogrid ) xlabel(1900(10)2024, angle(45) nogrid) /// title("") legend(off) twoway /// (lpoly wvar_ops year, bw(5) lpattern(dash)) /// , /// ylabel(.6(.05).8, nogrid ) xlabel(1900(10)2024, angle(45) nogrid) /// title("") legend(off) twoway /// (lpoly wsd_ops year, bw(5) lpattern(dash)) /// , /// ylabel(.10(.02).18, nogrid ) xlabel(1900(10)2024, angle(45) nogrid) /// title("") legend(off) gen cv_ops=wsd_ops/wvar_ops*100 twoway /// (lpoly cv_ops year, bw(5) lpattern(dash)) /// , /// ylabel(, nogrid ) xlabel(1900(10)2024, angle(45) nogrid) /// title("") legend(off)