clear matrix set matsize 800 set mem 500m cd [your disk partition] use lahman pitching ** back out hitter ABs * drop if yearID <1900 replace BAOpp = . if BAOpp == 9.99 gen abf= H/BAOpp drop if abf==. // sum data per pitcher collapse (sum) W L G GS CG SHO SV H ER HR BB SO IBB WP HBP BK BFP GF R GIDP IPouts abf, by(playerID yearID teamID lgID) // IP gen IP=IPouts/3 // import BIP data merge m:m playerID yearID using [1930_1979 BIP] ,update drop _merge merge m:m playerID yearID using [1980_1999 BIP],update drop _merge merge m:m playerID yearID using [1912_1929 BIP],update drop _merge merge m:m playerID yearID using [2000-2024 BIP], update replace drop _merge drop if IP<100 drop if IP==. replace teamID ="MIL" if teamID == "ML4" & year > 1997 replace teamID ="ML4" if teamID == "MIL" & year <1998 replace teamID="LAA" if teamID =="ANA" & year >2004 merge m:m [BBR 2024 pitcher data],update drop _merge drop if yearID==. drop if IP==. /// League id is not part of model but data retained for use in exploring alternative BIP_RBA models gen AL=. replace AL=1 if lgID=="AL" replace AL=0 if lgID =="NL" replace AL=0 if teamID== "CHN" replace AL=0 if teamID== "WAS" replace AL=1 if teamID== "OAK" replace AL=0 if teamID== "SLN" replace AL=0 if teamID== "PIT" replace AL=1 if teamID== "LAA" replace AL=0 if teamID== "NYN" replace AL=1 if teamID== "MIN" replace AL=0 if teamID== "MIL" replace AL=1 if teamID== "HOU" replace AL=1 if teamID== "CLE" replace AL=0 if teamID== "PHI" replace AL=0 if teamID== "ATL" replace AL=1 if teamID== "TBA" replace AL=1 if teamID== "KCA" replace AL=1 if teamID== "DET" replace AL=1 if teamID== "SEA" replace AL=1 if teamID== "TEX" replace AL=1 if teamID== "NYA" replace AL=0 if teamID== "COL" replace AL=0 if teamID== "LAN" replace AL=1 if teamID== "BOS" replace AL=1 if teamID== "CHA" replace AL=0 if teamID== "SDN" replace AL=1 if teamID== "TOR" replace AL=1 if teamID== "BAL" replace AL=0 if teamID== "ARI" replace AL=0 if teamID== "SFN" replace AL=0 if teamID== "MIA" replace AL=0 if teamID== "CIN" replace lgID ="AL" if AL==1 & lgID=="" replace lgID ="NL" if AL==0 & lgID=="" /// merge bbref SH SF merge m:m bbrefID yearID using [BBR SH/SF],update drop _merge drop if yearID==. replace abf=(H+(IP*3)) if year ==2024 drop if IP<100 drop if ifpop==. drop if yearID==. drop if SO ==. *** determine performance values as rates (some vestigial stuff here) gen gb_pct_rs2 =GB/(GB+FB+LD+POP) gen k9=9*SO/IP gen bb9=9*BB/IP gen hr9=9*HR/IP gen hr_pip=HR/IP gen k_pip=SO/IP gen bb_pip=BB/IP gen hbp_pip=HBP/IP drop if IP <100 gen rapg=9*R/IP gen obabip=(H-HR)/(abf-(SO+HR)+SF) gen gb_pct= gb/(gb+airb) gen ifgb_pip=ifgb/IP gen ifpop_pip=ifpop/IP gen airb_pip=airb/IP gen ofair_pip=ofair/IP gen bobw= ifpop_pip-ofair_pip gen gb_pct2= gb2/(gb2+airb2) gen ifgb_pip2=ifgb2/IP gen ifpop_pip2=ifpop2/IP gen airb_pip2=airb2/IP gen ofair_pip2=ofair2/IP gen bobw2= ifpop_pip2-ofair_pip2 gen ifair_pip=ifair/IP gen ifair_pip2=ifair/IP regress rapg k_pip bb_pip hr_pip hbp_pip [iweight=IP] /// calculate season-specific FIPr gen fipr=. foreach y of numlist 1912/2019 2021 2022 2023 2024 { di "year `y'" regress rapg k_pip bb_pip hr_pip hbp_pip [iweight=IP] if year==`y' predict yhat replace fipr = yhat if year==`y' drop yhat } /// merge team fielding scores gen teamid = teamID merge m:m teamid yearID using [team fielding data] drop _merge /// define BIP-era periods gen period=. replace period = 1912 if yearID < 1923 replace period= 1923 if yearID >= 1923 & yearID <1928 replace period = 1928 if yearID >= 1928 & yearID < 1935 replace period = 1935 if yearID >= 1935 & yearID < 1942 replace period = 1942 if yearID >= 1942 & yearID < 1955 replace period = 1955 if yearID >= 1955 & yearID < 1967 replace period = 1967 if yearID >= 1967 & yearID < 1975 replace period = 1975 if yearID >= 1975 & yearID < 1980 replace period = 1980 if yearID >= 1980 & yearID < 1990 replace period = 1990 if yearID >= 1990 & yearID < 1998 replace period = 1998 if yearID >= 1996 & yearID < 2002 replace period = 2002 if yearID >= 2002 & yearID < 2012 replace period = 2012 if yearID >= 2012 & yearID < 2017 replace period = 2017 if yearID >= 2017 drop if period==. /// fit model with period-specific parameters regress rapg i.period#c.(fipr field_composite ofair_pip ifpop_pip) [pweight=IP] /// save period parameters gen cons=. gen b_fipr = . gen b_field = . gen b_ifpop = . gen b_ofair = . foreach p in 1912 1923 1928 1935 1942 1955 1967 1975 1980 1990 1998 2002 2012 2017 { replace cons = _b[_cons] if period==`p' replace b_fipr = _b[`p'.period#c.fipr] if period==`p' replace b_field = _b[`p'.period#c.field_composite] if period==`p' replace b_ifpop = _b[`p'.period#c.ifpop_pip] if period==`p' replace b_ofair = _b[`p'.period#c.ofair_pip] if period==`p' } // save period-specific mean IF pop-up and OF airball rates // weightd means for batted ball variables; maybe average should be for period and not season in question?... * 1. Summarize IP within each period egen total_IP = sum(IP), by(period) * 2. Compute weighted mean of ifpop_pip generate double weighted_ifpop_pip_product = ifpop_pip * IP egen double total_weighted_ifpop_pip = sum(weighted_ifpop_pip_product), by(period) generate double weighted_mean_ifpop_pip = total_weighted_ifpop_pip / total_IP rename weighted_mean_ifpop_pip wvar_ifpop_pip * 2. Compute weighted mean of ofair_pip generate double weighted_ofair_pip_product = ofair_pip * IP egen double total_weighted_ofair_pip = sum(weighted_ofair_pip_product), by(period) generate double weighted_mean_ofair_pip = total_weighted_ofair_pip / total_IP rename weighted_mean_ofair_pip wvar_ofair_pip collapse wvar_ifpop wvar_ofair cons b_fipr b_field b_ifpop b_ofair,by(period) save [BIB_RBA parameters], replace