clear clear matrix set matsize 800 set mem 500m cd your directior use lahman pitching drop if yearID <1900 // rename variables rename (B S) (doub trip) // repace missing hbp with individual batting data merge 1:1 yearID lgID teamID using "lahman_hbp_sf_sh_patch.dta", update drop _merge // merge 2024 season data from bb_reference merge m:m yearID teamID using bb ref 2024 team totals drop _merge // runs per game & runs allowed per game gen rpg=(R/IPouts)*27 gen rapg=(RA/IPouts)*27 // merge bbref team rfield merge m:m yearID teamID using "bb_ref_team_rfield" drop _merge replace rfield=rfield_br if rfield==. /// merge fielding bible drs rename teamID teamid merge m:m yearID teamid using fielding bible DRS data drop _merge // merge fangraphs defense coded merge m:m yearID teamid using fg fielding data drop _merge rename defense fgfield rename teamid teamID // merge BBREF FIP merge m:m yearID teamID using bb ref FIP data drop _merge drop if year <1900 // standardize rfield bysort yearID: egen mean_rfield = mean(rfield) bysort yearID: egen sd_rfield = sd(rfield) gen z_rfield = (rfield - mean_rfield) / sd_rfield // standardize drs and fgfield bysort yearID: egen mean_fgfield = mean(fgfield) bysort yearID: egen sd_fgfield = sd(fgfield) gen z_fgfield = (fgfield - mean_fgfield) / sd_fgfield bysort yearID: egen mean_fb_drs = mean(fb_drs) bysort yearID: egen sd_fb_drs = sd(fb_drs) gen z_fb_drs = (fb_drs - mean_fb_drs) / sd_fb_drs // standardize FIP bysort yearID: egen mean_FIP = mean(FIP) bysort yearID: egen sd_FIP = sd(FIP) gen z_FIP = (FIP - mean_FIP) / sd_FIP // standardize runs against per game and runs scored per game bysort yearID: egen mean_rapg = mean(rapg) bysort yearID: egen sd_rapg = sd(rapg) gen z_rapg = (rapg - mean_rapg) / sd_rapg bysort yearID: egen mean_rpg = mean(rpg) bysort yearID: egen sd_rpg = sd(rpg) gen z_rpg = (rpg - mean_rpg) / sd_rpg // multi-season regression models for TZR and DRS, fgfield hireg z_rapg (z_FIP) (z_rfield) if year < 2003 hireg z_rapg (z_FIP) (z_fgfield) if year <2003 hireg z_rapg (z_FIP) (z_rfield) if year > 2002 hireg z_rapg (z_FIP) (z_fgfield) if year > 2002 regress z_rapg z_FIP if year <1990 regress z_rapg z_FIP if year >2020 // individual season models ** season R^2s using BB ref gen R21 = . gen R22 = . foreach yr of numlist 1900/2024 { // Adjust the range to your data // Run the regression with z_FIP only for the current year regress RA FIP if year == `yr' // Store R2 from the first model in R21 for the current year local r2_1 = e(r2) replace R21 = `r2_1' if year == `yr' // Run the regression with z_FIP and z_rfield for the current year regress RA FIP rfield if year == `yr' // Store R2 from the second model in R22 for the current year local r2_2 = e(r2) replace R22 = `r2_2' if year == `yr' } preserve * Collapse to get mean R2 by year collapse (mean) R21 R22, by(year) * convert dataset to time series based on year sort year tsset year * * Multiply the y-axis variables by 100 for % scaling gen R21_scaled=R21*100 gen R22_scaled=R22*100 * Plot figure 1 * Generate smoothed values for R22 using lpoly with specified points lpoly R22_scaled year, gen(R22_smooth) bw(2) at(year) * Plot with gray-filled area for R22 and dashed line for R21 twoway (area R22_smooth year, color(gs12%50)) /// // Gray-filled area for smoothed R22 (lpoly R21_scaled year, lcolor(black) lpattern(longdash) lwidth(medium) bw(5)), /// // Black dashed lpoly line for smoothed R21 ytitle("R-squared (scaled x100)") /// xtitle("Year") /// ylabel(0(10)100, labsize(medium) nogrid) /// xlabel(1900(10)2023, angle(45) labsize(medium) nogrid) /// legend(off) /// graphregion(color(white)) /// plotregion(margin(zero)) restore drop R21 R22 ** season R^2s using BB ref pre 2003 & DRS post replace rfield=fb_drs if year >2002 gen R21 = . gen R22 = . foreach yr of numlist 1900/2024 { // // Run the regression with z_FIP only for the current year regress RA FIP if year == `yr' // Store R2 from the first model in R21 for the current year local r2_1 = e(r2) replace R21 = `r2_1' if year == `yr' // Run the regression with z_FIP and z_rfield for the current year regress RA FIP rfield if year == `yr' // Store R2 from the second model in R22 for the current year local r2_2 = e(r2) replace R22 = `r2_2' if year == `yr' } preserve * Collapse to get mean R2 by year collapse (mean) R21 R22, by(year) * convert dataset to time series based on year sort year tsset year * * Multiply the y-axis variables by 100 for % scaling gen R21_scaled=R21*100 gen R22_scaled=R22*100 * Plot figure 1 equiv using fielding bible post 2003 * Generate smoothed values for R22 using lpoly with specified points lpoly R22_scaled year, gen(R22_smooth) bw(2) at(year) * Plot with gray-filled area for R22 and dashed line for R21 twoway (area R22_smooth year, color(gs12%50)) /// // Gray-filled area for smoothed R22 (lpoly R21_scaled year, lcolor(black) lpattern(longdash) lwidth(medium) bw(5)), /// // Black dashed lpoly line for smoothed R21 ytitle("R-squared (scaled x100)") /// xtitle("Year") /// ylabel(0(10)100, labsize(medium) nogrid) /// xlabel(1900(10)2023, angle(45) labsize(medium) nogrid) /// legend(off) /// graphregion(color(white)) /// plotregion(margin(zero)) restore replace rfield=rfield_br if year >2002 ** season R^2s using fg replace rfield=fb_drs if year >2002 drop R21 R22 gen R21 = . gen R22 = . foreach yr of numlist 1900/2024 { // // Run the regression with z_FIP only for the current year regress RA FIP if year == `yr' // Store R2 from the first model in R21 for the current year local r2_1 = e(r2) replace R21 = `r2_1' if year == `yr' // Run the regression with z_FIP and z_fgfield for the current year regress RA FIP fgfield if year == `yr' // Store R2 from the second model in R22 for the current year local r2_2 = e(r2) replace R22 = `r2_2' if year == `yr' } preserve * Collapse to get mean R2 by year collapse (mean) R21 R22, by(year) * convert dataset to time series based on year sort year tsset year * * Multiply the y-axis variables by 100 for % scaling gen R21_scaled=R21*100 gen R22_scaled=R22*100 * Plot figure 1 equiv w/ FG data * Generate smoothed values for R22 using lpoly with specified points lpoly R22_scaled year, gen(R22_smooth) bw(2) at(year) * Plot with gray-filled area for R22 and dashed line for R21 twoway (area R22_smooth year, color(gs12%50)) /// // Gray-filled area for smoothed R22 (lpoly R21_scaled year, lcolor(black) lpattern(longdash) lwidth(medium) bw(5)), /// // Black dashed lpoly line for smoothed R21 ytitle("R-squared (scaled x100)") /// xtitle("Year") /// ylabel(0(10)100, labsize(medium) nogrid) /// xlabel(1900(10)2023, angle(45) labsize(medium) nogrid) /// legend(off) /// graphregion(color(white)) /// plotregion(margin(zero)) restore // models for ballpark estimates of diminution of fielding importance bef after K rates // increase hireg z_rapg (z_FIP) (z_rfield) if year >1954 & year < 2000 hireg z_rapg (z_FIP) (z_rfield) if year > 2002 // illustrate shrinking value of rfield_br *** save unit value of rfield for each season gen beta_rfield = . gen beta_drs = . gen beta_FIP=. foreach yr of numlist 1900/2024 { // Run the regression with FIP and rfield only for the current year quietly regress RA FIP rfield if year == `yr' // Store the betas in the current year local b_rfield = _b[rfield] local b_FIP = _b[FIP] // Replace stored betas with betas for observations in the current year replace beta_rfield = `b_rfield' if year == `yr' replace beta_FIP = `b_FIP' if year == `yr' } *** save unit value of drs for each season foreach yr of numlist 2003/2024 { // Run the regression with FIP and rfield only for the current year quietly regress RA FIP fb_drs if year == `yr' // Replace beta_ for observations in the current year replace beta_drs = _b[fb_drs] if year == `yr' } *** plot ** change sign of betas to reflect runs saved gen beta_rfield_neg= beta_rfield*-1 gen beta_drs_neg= beta_drs*-1 * adjust for 2021 short season replace beta_FIP=beta_FIP*2.7 if year ==2020 *** plot twoway (lpolyci beta_rfield_neg year if year >1950, clcolor(gs12%50) clstyle(none)) /// (lpoly beta_rfield_neg year if year >1950, lcolor(black) lpattern(dash)), /// ylabel(, nogrid) xlabel(1950(10)2020, angle(45) nogrid) /// title("") xtitle("") /// legend(off) twoway (lpolyci beta_FIP year if year >1950, clcolor(gs12%50) clstyle(none)) /// (lpoly beta_FIP year if year >1950, lcolor(black) lpattern(dash)), /// ylabel(, nogrid) xlabel(1950(10)2020, angle(45) nogrid) /// title("") xtitle("") /// legend(off) /// assess rinflation on Boyer, Beltre & areanado preserve collapse (mean) beta_rfield beta_drs ,by (year) drop if year <1955 merge m:m yearID using third base rfield data // calculate adjusted rfs (taking into account need to reverse signs of betas, which are valanced toward runs allowed, not saved) gen adj_br= beta_rfield*rfield*-1 gen adj_drs=beta_drs*drs*-1 collapse (sum) (rfield drs adj_br adj_drs), by(nameLast) gen br_inflation=adj_br-rfield gen drs_inflation=adj_drs-drs export excel using "[your directory]third_base_rfield_inflation.xls", firstrow(varlabels) replace restore *** how bad was Derek Jeter? preserve collapse (mean) beta_rfield beta_drs ,by (year) drop if year <2000 merge m:m yearID using jeter_rfield data gen dj_dra=0 forval yr = 2000/2014 { replace dj_dra = djrf * beta_rfield*-1 if year == `yr' } gen dj_br_inflation=dj_dra-djrf collapse (sum) djrf dj_dra dj_br_inflation export excel using "[your directory]\jeter_rfield_inflation.xls", firstrow(varlabels) replace restore /// Illustrate impact of shfting value of fielding and pitching for 1967 pennant race ** use clarify MC to compute difference in runs avoided for Red Sox and Twins '67 vs. '24 estsimp regress RA rfield FIP if year >1962 & year == 1967 setx mean simqi, fd(ev) changex(rfield 37 -28 FIP 3.5 2.90) drop b* estsimp regress RA rfield FIP if year >1962 & year == 2024 setx mean simqi, fd(ev) changex(rfield 37 -28 FIP 3.5 2.90) drop b* // compute contributions to team runs scored associated with OPSs *** calculate OPS *** generate SLG *** gen slg=((H-(doub+trip))+2*doub+3*trip+4*HR)/AB *** generate opb *** gen obp =(H+BB+HBP)/(AB+BB+HBP+SF) *** add for OPS gen ops=obp+slg *** use clarify MC to estimate differences in runs scored '67 vs '24 estsimp regress R ops if year ==1967 setx mean simqi, fd(ev) changex(ops .716 .695) drop b* estsimp regress R ops if year ==2024 setx mean simqi, fd(ev) changex(ops .716 .695) drop b*