clear	
	clear matrix
	set matsize 800
	set mem 500m
	cd "C:\Users\dmk38\Documents\x5\"
	use Lahman team pitching file


	//drop non AL/NL

	drop if lgID != "AL" & lgID != "NL"


		
		// merge FIP & 2024 team data 
		
	merge m:m yearID teamID using bb ref 2024 patch, update

	drop _merge

	// runs per game & runs allowed per game

	gen rpg=(R/IPouts)*27
	gen rapg=(RA/IPouts)*27
	
	 
	 
	 ** standardize 


		bysort yearID: egen mean_rapg = mean(rapg)
		bysort yearID: egen sd_rapg  = sd(rapg)
		gen z_rapg = (rapg - mean_rapg) / sd_rapg
		
				bysort yearID: egen mean_RA = mean(RA)
		bysort yearID: egen sd_RA  = sd(RA)
		gen z_RA = (RA - mean_RA) / sd_RA


		//merge BBREF direct fielding
		


	rename teamID teamid
		merge m:m yearID teamid using bb ref rfield data , update

		drop _merge
		

		
		

			
			//merge fg wars
		


		merge m:m yearID teamid using fg pitching war data 

		drop _merge
		drop if lgID==""
		drop if yearID<1900
		
	/// merge BBREF pwar
	


	merge m:m yearID teamid using bb ref pitching war data

	drop _merge

	
	/// standardize rfield 
	bysort yearID: egen mean_rfield = mean(rfield)
	bysort yearID: egen sd_rfield  = sd(rfield)
	gen z_rfield = (rfield - mean_rfield) / sd_rfield

	

	/// standardize pwar 
	bysort yearID: egen mean_pwar = mean(pwar)
	bysort yearID: egen sd_pwar  = sd(pwar)
	gen z_pwar = (pwar - mean_pwar) / sd_pwar


	
	/// pwar 
	gen pwpg=pwar/G


	bysort yearID: egen mean_pwpg = mean(pwpg)
	bysort yearID: egen sd_pwpg  = sd(pwpg)
	gen z_pwpg = (pwpg - mean_pwpg) / sd_pwpg
	
	
	
	/// fg war per game
	
	
	gen fgpwpg=fgpwar/G
	
	
		
	
	
	
	/// rename fg pwar
	
	gen fgpitch=fgpwar
	
	
	bysort yearID: egen mean_fgpitch = mean(fgpitch)
	bysort yearID: egen sd_fgpitch  = sd(fgpitch)
	gen z_fgpitch = (fgpitch - mean_fgpitch) / sd_fgpitch
	
	// standardize FIP 


	bysort yearID: egen mean_FIP = mean(FIP)
	bysort yearID: egen sd_FIP  = sd(FIP)
	gen z_FIP = (FIP - mean_FIP) / sd_FIP
	
	



/// examine relative explanatory power of pitching profciency metrics over modern era


	
gen Rrfield=.
gen Ri_FIP = .
gen Ri_pwpg=.
gen Ri_fgpitch=.

foreach yr of numlist 1900/2024 { 
    // Run the regression with z_FIP only for the current year
    regress rapg rfield if year == `yr'

    // Store R2 from the first model
    local Rf = e(r2)
    replace Rrfield = `Rf' if year == `yr'

    // Run the regression with FIP and rfield for the current year
    regress rapg FIP rfield if year == `yr'
    local r2 = e(r2)
    replace Ri_FIP = `r2' - Rrfield if year == `yr'

    // Run the regression with pwpg and rfield for the current year
    regress rapg pwpg rfield if year == `yr'
    local r2 = e(r2)
    replace Ri_pwpg = `r2' - Rrfield if year == `yr'

    // Run the regression with fgpitch and rfield for the current year
    regress rapg fgpitch rfield if year == `yr'
    local r2 = e(r2)
    replace Ri_fgpitch = `r2' - Rrfield if year == `yr'
}
	
	// scale for % variance explained
	
	gen Ri_FIP_100=Ri_FIP*100
	gen Ri_pwpg_100=Ri_pwpg*100
	gen Ri_fgpitch_100=Ri_fgpitch*100   
	
	
	twoway (lpolyci Ri_fgpitch_100  year, lwidth(none) bwidth(10)) ///  // Gray area for ci
       (lpoly Ri_fgpitch_100  year, lcolor(black) lpattern(shortdash) lwidth(thin) bwidth(10))  ///  
       (lpolyci Ri_FIP_100 year, lwidth(none) bwidth(10)) ///
	   (lpoly Ri_FIP_100 year, lcolor(black) lpattern(shortdash) lwidth(thin) bwidth(10)) ///  // Black dashed lpoly line for smoothed Rfg
       (lpolyci Ri_pwpg_100 year, lwidth(none) bwidth(10)) ///  // Gray area for ci
       (lpoly Ri_pwpg_100 year, lcolor(black) lpattern(shortdash) lwidth(thin) bwidth(10)) , ///  // Black dashed lpoly line for smoothed Rops
       ytitle("R-squared (scaled x100)") ///
       xtitle("Year") ///
       ylabel(0 "0%" 10 "10%" 20 "20%" 30 "30%" 40 "40%" 50 "50%" 60 "60%" 70 "70%" 80 "80%" 90 "90%" 100 "100%", labsize(medium) nogrid) ///
       xlabel(1900(10)2024, angle(45) labsize(medium) nogrid) ///
       legend(off) ///
       graphregion(color(white)) ///
       plotregion(margin(zero))

	   
////   examine impact of variance in metrics on expected runs allowed per game for select seasons 


	*** changg sign of FIP so same as WAR measures
	
	replace z_FIP=z_FIP*-1


	generate fip_ev=.
	generate fg_ev=.
	generate pw_ev=.

	
	generate pr_axis = _n*.05 + -1.05 in 1/41
	   
	*** using clarify monte carlo simulations here
	
estsimp regress z_RA rfield z_pwpg if year >1899 & year <1921  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	




local a = -1
local n=1
while `a' <= 1.05 {
setx z_pwpg `a' 
simqi, ev genev(pi)

replace pw_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*

estsimp regress z_RA rfield z_fgpitch if year >1899 & year <1921  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	
	



local a = -1
local n=1
while `a' <= 1.05 {
setx z_fgpitch `a' 
simqi, ev genev(pi)

replace fg_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*


estsimp regress z_RA rfield z_FIP if year >1899 & year <1921  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	
setx rfield mean	



local a = -1
local n=1
while `a' <= 1.05 {
setx z_FIP `a' 
simqi, ev genev(pi)

replace fip_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*



su RA if year > 1899 & year <1921 ,d


twoway (lfitci pw_ev pr_axis,lcolor(blue) acolor(blue%50) lpattern(dash) alcolor(none) xlabel(, nogrid)  ) (lfitci fg_ev pr_axis, alcolor(white) ///
lpattern(dash) xlabel(, nogrid)  ylabel(-.56 "-60" -.40 "-40" -.24 "-20"  0 .24 "20" -.40 "-40" .56 "60",  grid) ///
 alcolor(none) lcolor(red) acolor(red%25)) ///
(lfitci fip_ev pr_axis, lcolor(green) alcolor(none) lpattern(dash)  acolor(green%25 xlabel(, nogrid))  legend(off))



estsimp regress z_RA rfield z_pwpg if year >1945 & year <1956  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	




local a = -1
local n=1
while `a' <= 1.05 {
setx z_pwpg `a' 
simqi, ev genev(pi)

replace pw_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*

estsimp regress z_RA rfield z_fgpitch if year >1945 & year <1956  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	
	



local a = -1
local n=1
while `a' <= 1.05 {
setx z_fgpitch `a' 
simqi, ev genev(pi)

replace fg_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*


estsimp regress z_RA rfield z_FIP if year >1945 & year <1956  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	
setx rfield mean	



local a = -1
local n=1
while `a' <= 1.05 {
setx z_FIP `a' 
simqi, ev genev(pi)

replace fip_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*



su RA  if year >1945 & year <1956 ,d


twoway (lfitci pw_ev pr_axis,lcolor(blue) acolor(blue%50) lpattern(dash) alcolor(none) xlabel(, nogrid)  ) (lfitci fg_ev pr_axis, alcolor(white) ///
lpattern(dash) xlabel(, nogrid)  ylabel(-.65 "-60" -.43 "-40"    -.22 "-20"   0  .22 "20" .43 "40"  .65 "60",  grid) ///
 alcolor(none) lcolor(red) acolor(red%25)) ///
(lfitci fip_ev pr_axis, lcolor(green) alcolor(none) lpattern(dash)  acolor(green%25 xlabel(, nogrid))  legend(off))






estsimp regress z_RA rfield z_pwpg if year >1959 & year <1981  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	




local a = -1
local n=1
while `a' <= 1.05 {
setx z_pwpg `a' 
simqi, ev genev(pi)

replace pw_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*

estsimp regress z_RA rfield z_fgpitch if year >1959 & year <1981  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	
	



local a = -1
local n=1
while `a' <= 1.05 {
setx z_fgpitch `a' 
simqi, ev genev(pi)

replace fg_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*


estsimp regress z_RA rfield z_FIP if year >1959 & year <1981  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	
setx rfield mean	



local a = -1
local n=1
while `a' <= 1.05 {
setx z_FIP `a' 
simqi, ev genev(pi)

replace fip_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*



su RA  if year >1959 & year <1981 ,d



twoway (lfitci pw_ev pr_axis,lcolor(blue) acolor(blue%50) lpattern(dash) alcolor(none) xlabel(, nogrid) ///
 ylabel( -.73 "-60" -.49  "-40" -.24 "-20"   0  .24 "20" .49 "40" .73 "60",  grid))  (lfitci fg_ev pr_axis, alcolor(white) ///
lpattern(dash) xlabel(, nogrid) ///
alcolor(none) lcolor(red) acolor(red%25)) ///
(lfitci fip_ev pr_axis, lcolor(green) alcolor(none) lpattern(dash)  acolor(green%25 xlabel(, nogrid))  legend(off))






estsimp regress z_RA rfield z_pwpg if  year >2009 & year <2019  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	




local a = -1
local n=1
while `a' <= 1.05 {
setx z_pwpg `a' 
simqi, ev genev(pi)

replace pw_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*

estsimp regress z_RA rfield z_fgpitch if  year >1999 & year <2010 // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	
	



local a = -1
local n=1
while `a' <= 1.05 {
setx z_fgpitch `a' 
simqi, ev genev(pi)

replace fg_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*


estsimp regress z_RA rfield z_FIP if year >1999 & year <2010  // note: pick whatever season(s) you want to compare
	setx mean
	simqi, ev
	
setx rfield mean	



local a = -1
local n=1
while `a' <= 1.05 {
setx z_FIP `a' 
simqi, ev genev(pi)

replace fip_ev =pi in `n'

drop pi
local n = `n'+1
local a = `a' + .05
}

drop b*



su RA if year >1999 & year <2010,d



twoway (lfitci pw_ev pr_axis,lcolor(blue) acolor(blue%50) lpattern(dash) alcolor(none) xlabel(, nogrid) ///
 ylabel( , grid))  (lfitci fg_ev pr_axis, alcolor(white) ///
lpattern(dash) xlabel(, nogrid) ///
 ylabel(-.73 "-60" -.49  "-40" -.24 "-20"   0  .24 "20" .49 "40" .73 "60",  grid) ///
 alcolor(none) lcolor(red) acolor(red%25)) ///
(lfitci fip_ev pr_axis, lcolor(green) alcolor(none) lpattern(dash)  acolor(green%25 xlabel(, nogrid))  legend(off))