

*******************************************************
*ECDF
*******************************************************




use [DF_5],clear
replace rep=rep+1


merge m:1 mgrID using [DF_4],nogen

gen  exceed=0 
replace exceed=1 if xb>xb_en
bys mgrID: egen sum_exceed= total(exceed)
bys mgrID: egen R=max(rep)
gen p_tile = (sum_exceed + 0.5) / (R + 1)






keep mgrID p_tile z_obs xb mgr_n

duplicates drop

sum p_tile,d


gen p_upper=1-p_tile

gen p_two = 2*min(p_tile, 1 - p_tile)



sum p_two,d

gen z_abs = invnormal(1 - p_two/2)
gen z_obs_p = z_abs
replace z_obs_p = -z_abs if z_obs < 0
drop z_abs   // optional

gen p_check = 2*normal(-abs(z_obs_p))
drop if p_tile==.

save mwar_observed_pivotal.dta,replace

count if z_obs_p>=1.96 
count if  z_obs_p<=-1.96





* --- Build null distribution of ECDF p's/z's (Jeffreys-style smoothing) ---
preserve
  bys mgrID: egen Rm = count(xb_en)
bys mgrID: egen rk = rank(xb_en)   // average ranks if ties

* Jeffreys-style plotting position
gen double u = (rk - 0.5) / (Rm + 1)

gen double p2 = 2*min(u, 1 - u)
replace p2 = max(p2, 1e-12)

gen double z_sim_abs = invnormal(1 - p2/2)
gen double z_sim_p   = cond(u < 0.5, -z_sim_abs, z_sim_abs)
drop z_sim_abs

    save mwar_en_null_pivotal.dta, replace
use  mwar_en_null_pivotal.dta,clear
	
******************************
*Exceedence
******************************	
	
	
    
    gen e = p2 <= 0.05
    bys rep: egen K_r = total(e)
    bys rep: keep if _n==1

    quietly summarize K_r
    scalar E0K   = r(mean)
    scalar Rtot  = r(N)
    count if K_r >= 59
    scalar p_upper = (1 + r(N)) / (Rtot + 1)
    _pctile K_r, p(5,50,95)

    display "K_obs=55 E0[K]=" %6.2f E0K "   p_upper=" %6.4f p_upper
    display "Null K (5/50/95%) = " r(r1) " / " r(r2) " / " r(r3)
	sum K_r,d
	count if K_r==57
	
	histogram K_r, discrete density


    * Empirical PMF from your reps
    contract K_r, freq(n)
    egen N = total(n)
    gen p_emp = n/N

    * Binomial(512, 0.05) PMF using log-factorials
    gen lchoose = lnfactorial(512) - lnfactorial(K_r) - lnfactorial(512 - K_r)
    gen lp      = lchoose + K_r*ln(0.05) + (512-K_r)*ln(0.95)
    gen p_binom = exp(lp)

    twoway ///
        (bar p_emp K_r, barwidth(.9)color(red) lcolor(none)), ///
        ytitle("Probability mass") ylabel(,nogrid) xtitle("K_r") xlabel(0(10)60, nogrid) yscale(off) xline(57, lwidth(thick) lcolor(blue)) ///
      xtitle("")  legend(off)
	  
	  
	  graph save "pmf_emp.gph", replace

* Vector exports (pick what you need)
graph export "pmf_emp.pdf", replace      // good general-purpose vector
graph export "pmf_emp.eps", replace      // good for LaTeX workflows
graph export "pmf_emp.svg", replace      // good for web/editing (if supported in your Stata)
graph export "pmf_emp.emf", replace      // best for Word/PowerPoint on Windows
restore

graph use "pmf_emp.gph"





*******************************************************
* Plot 2: EN vs. observed
*******************************************************

* step 1: put Monkeys & Humans on same scale, calibrated to EN
clear all
set more off
use mwar_en_null_pivotal.dta,clear   // has z_sim

* Location & scale of bootstrap null (trim or robust if desired)
summ z_sim_p
scalar mu0_sim = r(mean)
scalar sd0_sim = r(sd)

di "MC EN mean = " mu0_sim "  SD = " sd0_sim

* Standardize simulated z's to N(0,1) under EN
gen z_stand = z_sim_p

merge m:1 mgrID using mwar_observed_pivotal.dta,nogen


gen z_cal = (z_obs_p - mu0_sim) / sd0_sim
gen source = "obs"

keep mgrID mgr_n z_cal z_sim_p source
sum z_cal
replace z_cal=z_cal-r(mean)



	******************************************************
		* 1) Estimate pi0 by central matching (|z| <= 0.5)
		******************************************************
		tempvar x f f0 center
		quietly kdensity z_sim_p, bw(.3) n(2000) gen(`x' `f')
		                   // capture numeric bandwidth once
		replace `f' = `f' + 1e-12
		gen double `f0' = normalden(`x')
		gen byte   `center' = abs(`x') <= 0.5
		egen double f_bar  = mean(cond(`center', `f', .))
		egen double f0_bar = mean(cond(`center', `f0', .))

		tempname PI0
		scalar `PI0' = min(1, max(0, f_bar / f0_bar))

		******************************************************
		* 2) Identify target t = max_i |z_i|
		******************************************************
		gen double _absz = abs(z_sim_p)
		summarize _absz, meanonly
		tempname T
		scalar `T' = r(max)

		******************************************************
		* 3) Grid search minimal s >= 1 with lfdr_s(t) <= 0.50
		*    lfdr_s(t) = pi0 * phi(t) / [phi(t/s)/s]
		******************************************************
		tempname S S_MAX S_STEP S_STAR NUM DEN LFDRS
		scalar `S_MAX'  = 3
		scalar `S_STEP' = 0.001
		scalar `S_STAR' = .

		* numerator is constant in the loop
		scalar `NUM' = scalar(`PI0') * normalden(scalar(`T'))

		forvalues k = 0/`=floor((`S_MAX' - 1)/`S_STEP')' {
			scalar `S' = 1 + `k' * `S_STEP'
			scalar `DEN'   = normalden(scalar(`T')/scalar(`S')) / scalar(`S')
			scalar `LFDRS' = scalar(`NUM') / scalar(`DEN')
			if (scalar(`LFDRS') <= 0.50) {
				scalar `S_STAR' = scalar(`S')
				continue, break
			}
		}

		if missing(scalar(`S_STAR')) {
			di as err "No s in [1," %6.3f scalar(`S_MAX') "] achieved lfdr<=0.50 at t=" %6.3f scalar(`T')
			restore
			exit
		}

		di as txt "pi0=" %6.3f scalar(`PI0') "   t=max|z|=" %6.3f scalar(`T') "   s*=" %6.3f scalar(`S_STAR')

* Step 2. Stack observed + simulated 

* choose a wide grid
			range Xgrid -5 5 800

			* observed on fixed grid
			tempvar f_obs_fix f_hyp_fix f0_fix
			quietly kdensity z_cal , at(Xgrid) gen(`f_obs_fix') bw(.3)

		
			* EN on that grid
			gen double `f0_fix' = normalden(Xgrid)

			twoway ///
				(line `f0_fix'  Xgrid, lwidth(thin)) ///
				(line `f_obs_fix' Xgrid, lwidth(medthick)) , ///
			   xlabel(-5(2)5, nogrid noticks)  /* SAME treatment as left */ ///
				ylabel(, noticks nogrid nolabel) yscale(off) ///  
				graphregion(color(white)) legend(off) xtitle("")

				
				
			* make a baseline for rarea
			capture drop y0
			gen double y0 = 0

			* filled densities: gray (inflated), red (EN), blue (observed)
			twoway ///
			(rarea y0 `f0_fix'   Xgrid, color(red%35)    lcolor(black)   lwidth(medthick)) ///
				(rarea y0 `f_obs_fix' Xgrid, color(blue%35)  lcolor(black)  lwidth(medthick)), ///
				xlabel(-4(1)4, nogrid  ) ///
				ylabel(, noticks nogrid nolabel) yscale(off) ///
				legend(off) xtitle("") graphregion(color(white)) plotregion(style(none))



		
		graph save "tail_enrich.gph", replace

* Vector exports (pick what you need)
graph export "tail_enrich.pdf", replace      // good general-purpose vector
graph export "tail_enrich.eps", replace      // good for LaTeX workflows
graph export "tail_enrich.svg", replace      // good for web/editing (if supported in your Stata)
graph export "tail_enrich.emf", replace      // best for Word/PowerPoint on Windows
restore


	
graph use "pmf_emp.gph"
	
	



************************************
*Global FDR
***********************************


*******************************************************
* LFDR from empirical null (z_sim_p) vs observed (z_obs_p)
* Inputs:
*   mwar_observed_pivotal.dta  : mgrID z_obs_p p_two
*   mwar_en_null_pivotal.dta   : z_sim_p

*******************************************************

clear all
set more off

*-----------------------------
* SETTINGS
*-----------------------------
local lambda = 0.50          // Storey pi0 tuning (for p-values)
local ngrid  = 2001          // grid resolution
local zlo    = -6            // grid lower bound
local zhi    =  6            // grid upper bound

*-----------------------------
* 1) Estimate pi0 from observed p_two (Storey)
*-----------------------------
use mwar_observed_pivotal_12.22.dta, clear
keep mgrID z_obs_p p_two
drop if missing(mgrID, z_obs_p, p_two)

gen byte gt = (p_two > `lambda')
quietly summarize gt, meanonly
scalar pi0 = min(1, r(mean)/(1-`lambda'))
display "pi0 (Storey, lambda=" %4.2f `lambda' ") = " %6.4f pi0

tempfile OBS
save `OBS', replace


*-----------------------------
* 2) Build common z grid dataset
*-----------------------------
clear
set obs `ngrid'
gen double zgrid = `zlo' + (_n-1)*((`zhi'-`zlo')/(`ngrid'-1))
gen byte isgrid = 1
tempfile GRID
save `GRID', replace


*-----------------------------
* 3) Estimate null density f0(z) on zgrid using z_sim_p
*    Trick: append null sample to the grid so kdensity
*    can use z_sim_p while evaluating at zgrid.
*-----------------------------
use `GRID', clear
tempfile NULLSAMP
use mwar_en_null_pivotal_12.22.dta, clear
keep z_sim_p
drop if missing(z_sim_p)
gen byte isgrid = 0
save `NULLSAMP', replace

use `GRID', clear
append using `NULLSAMP'

* Evaluate density of z_sim_p at the grid points zgrid
kdensity z_sim_p, at(zgrid) gen(f0) nograph

keep if isgrid==1
keep zgrid f0
sort zgrid
tempfile GRID0
save `GRID0', replace


*-----------------------------
* 4) Estimate observed density f(z) on same zgrid using z_obs_p
*    Again: append observed sample to the grid dataset.
*-----------------------------
use `GRID0', clear
gen byte isgrid = 1

tempfile OBSSAMP
use `OBS', clear
keep mgrID z_obs_p p_two
gen byte isgrid = 0
save `OBSSAMP', replace

use `GRID0', clear
gen byte isgrid = 1
append using `OBSSAMP'

kdensity z_obs_p, at(zgrid) gen(f) nograph

* Compute lfdr on grid rows only
keep if isgrid==1
keep zgrid f0 f
gen double lfdr_grid = (pi0 * f0) / f
replace lfdr_grid = min(max(lfdr_grid, 0), 1)

sort zgrid
tempfile LFDRGRID
save `LFDRGRID', replace


*-----------------------------
* 5) Map lfdr back to each manager by nearest grid point
*-----------------------------
use `OBS', clear
* clamp z to [zlo, zhi] for mapping
gen double zc = min(max(z_obs_p, `zlo'), `zhi')

scalar h = (`zhi' - `zlo')/(`ngrid' - 1)
gen long idx = round((zc - `zlo')/h)
replace idx = max(0, min(idx, `ngrid' - 1))
gen double zgrid = `zlo' + idx*h

merge m:1 zgrid using `LFDRGRID', nogen keep(match)

rename lfdr_grid lfdr_hat
drop zc idx zgrid f0 f




*-----------------------------
* 6)  "tail null share" 
*-----------------------------
gen byte tail = (p_two <= 0.05)

count if tail
scalar Ktail = r(N)

quietly summarize lfdr_hat if tail, meanonly
display "Tail size K (p<=0.05) = " %8.0f Ktail
display "Mean lfdr in tail      = " %6.3f r(mean)
display "Sum lfdr in tail       = " %8.2f (r(mean)*Ktail)
display "Implied non-null share = " %6.3f (1 - r(mean))





