------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  /Users/carlosmendez/Documents/GitHub/starter-academic-v501/content/post/stata_doubl
> e_lasso/analysis.log
  log type:  text
 opened on:  24 May 2026, 18:55:51

. 
. * Check required packages are installed (already in user's ado/plus).
. foreach pkg in rlasso cvlasso lasso2 pdslasso coefplot {
  2.     capture which `pkg'
  3.     if _rc {
  4.         di as error "Required package not installed: `pkg'"
  5.         di as error "Install via:  ssc install lassopack"
  6.         di as error "             ssc install pdslasso"
  7.         di as error "             ssc install coefplot"
  8.         exit 198
  9.     }
 10. }

. 
. * Site palette (dark theme, mirrors R version's ggplot theme_site).
. *   DARK_BG  = #0f1729 = "15 23 41"
. *   DARK_PNL = #1f2b5e = "31 43 94"
. *   LIGHT_TX = #c8d0e0 = "200 208 224"
. *   LIGHTER  = #e8ecf2 = "232 236 242"
. *   STEEL    = #6a9bcc = "106 155 204"
. *   ORANGE   = #d97757 = "217 119 87"
. *   TEAL     = #00d4c8 = "0 212 200"
. *   LT_ORG   = #e8956a = "232 149 106"
. global C_BG    "15 23 41"

. global C_PANEL "31 43 94"

. global C_GRID  "31 43 94"

. global C_TEXT  "200 208 224"

. global C_TXTHI "232 236 242"

. global C_STEEL "106 155 204"

. global C_ORG   "217 119 87"

. global C_TEAL  "0 212 200"

. global C_LTORG "232 149 106"

. 
. * Standard graph-region options for the dark theme.
. global DARKBG ///
>     graphregion(fcolor("$C_BG") ifcolor("$C_BG") lcolor("$C_BG") ilcolor("$C_BG")) ///
>     plotregion(fcolor("$C_BG") lcolor("$C_BG"))

. 
. 
. * ── 1. Data loading (six CSVs from GitHub raw URLs) ───────────────
. 
. di _n(2) "========================================"


========================================

. di "STEP 1 - DATA LOADING (six CSVs over HTTPS)"
STEP 1 - DATA LOADING (six CSVs over HTTPS)

. di "========================================"
========================================

. 
. local BASE = "https://raw.githubusercontent.com/cmg777/starter-academic-v501/master/content/po
> st/r_double_lasso/data"

. 
. tempfile linear partialled ctrl_v ctrl_p ctrl_m

. 
. * (a) Raw first-differenced outcomes and treatments + state IDs.
. *     Cols: state, Dyv, Dxv, Dyp, Dxp, Dym, Dxm
. import delimited "`BASE'/levitt_linear.csv", clear varnames(1) case(preserve)
(encoding automatically selected: ISO-8859-2)
(7 vars, 576 obs)

. gen long obs_id = _n

. di "  levitt_linear.csv         : `=_N' obs x `=c(k)' cols"
  levitt_linear.csv         : 576 obs x 8 cols

. save "`linear'"
file /var/folders/tq/t98kb27n6djgrh085g476yhc0000gn/T//S_64524.000001 saved as .dta format

. 
. * (b) Partialled (year-FE-removed) outcomes and treatments.
. *     Cols: state, DxV, DyV, DxP, DyP, DxM, DyM
. import delimited "`BASE'/levitt_partialled.csv", clear varnames(1) case(preserve)
(encoding automatically selected: ISO-8859-1)
(7 vars, 576 obs)

. * drop the duplicate state col so the merge is clean
. drop state

. gen long obs_id = _n

. di "  levitt_partialled.csv     : `=_N' obs x `=c(k)' cols (incl obs_id)"
  levitt_partialled.csv     : 576 obs x 7 cols (incl obs_id)

. save "`partialled'"
file /var/folders/tq/t98kb27n6djgrh085g476yhc0000gn/T//S_64524.000002 saved as .dta format

. 
. * (c)-(e) Three 284-column control matrices, one per outcome.
. *         Column names in source CSV use ^, *, ( ) etc. which Stata
. *         sanitises on import; we then rename to zv1..zv284 (etc.)
. *         so downstream code can address them uniformly.
. foreach o in v p m {
  2.     local fname : word 1 of "viol prop murd"
  3.     if "`o'" == "v" local long "viol"
  4.     if "`o'" == "p" local long "prop"
  5.     if "`o'" == "m" local long "murd"
  6.     import delimited "`BASE'/levitt_controls_`long'.csv", clear varnames(1)
  7.     local k = 0
  8.     foreach var of varlist _all {
  9.         local ++k
 10.         rename `var' z`o'`k'
 11.     }
 12.     di "  levitt_controls_`long'.csv : `=_N' obs x `k' cols (renamed zv1..z`o'`k')"
 13.     gen long obs_id = _n
 14.     save "`ctrl_`o''"
 15. }
(encoding automatically selected: ISO-8859-1)
(284 vars, 576 obs)
  levitt_controls_viol.csv : 576 obs x 284 cols (renamed zv1..zv284)
file /var/folders/tq/t98kb27n6djgrh085g476yhc0000gn/T//S_64524.000003 saved as .dta format
(encoding automatically selected: ISO-8859-1)
(284 vars, 576 obs)
  levitt_controls_prop.csv : 576 obs x 284 cols (renamed zv1..zp284)
file /var/folders/tq/t98kb27n6djgrh085g476yhc0000gn/T//S_64524.000004 saved as .dta format
(encoding automatically selected: ISO-8859-1)
(284 vars, 576 obs)
  levitt_controls_murd.csv : 576 obs x 284 cols (renamed zv1..zm284)
file /var/folders/tq/t98kb27n6djgrh085g476yhc0000gn/T//S_64524.000005 saved as .dta format

. 
. * Combine into one working dataset.
. use "`linear'", clear

. merge 1:1 obs_id using "`partialled'", nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                               576  
    -----------------------------------------

. merge 1:1 obs_id using "`ctrl_v'", nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                               576  
    -----------------------------------------

. merge 1:1 obs_id using "`ctrl_p'", nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                               576  
    -----------------------------------------

. merge 1:1 obs_id using "`ctrl_m'", nogen

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                               576  
    -----------------------------------------

. 
. * Sanity check: must be 576 obs, 48 states.
. assert _N == 576

. qui levelsof state, local(states)

. local nstates : word count `states'

. assert `nstates' == 48

. 
. di _n "  Merged working dataset: `=_N' obs, `nstates' clusters (states)"

  Merged working dataset: 576 obs, 48 clusters (states)

. 
. 
. * ── 2. Convenience: outcome metadata ─────────────────────────────
. 
. * For each outcome we have:
. *   raw differenced:   y = Dy[v/p/m]   d = Dx[v/p/m]
. *   year-FE partialled:y = Dy[V/P/M]   d = Dx[V/P/M]
. *   284 partialled controls: z[v/p/m]1..z[v/p/m]284
. *
. * The partialling step (done in the original Matlab pre-processing)
. * absorbs year fixed effects via Frisch-Waugh-Lovell: every variable
. * v becomes v - T (T'T)^-1 T' v, where T is the matrix of year
. * dummies. Regressions on the partialled vars are equivalent to
. * regressing the raw differences while controlling for year dummies,
. * with one less degree of freedom.
. 
. * Will use these foreach loops repeatedly.
. * Iteration uses short prefixes v/p/m (matching the renamed control vars
. * zv1..zv284, zp1..zp284, zm1..zm284).  Look-up helpers below map the
. * short prefix to outcome label and raw/partialled variable names.
. *
. *   prefix  -> label             y (raw)  d (raw)  y (partialled)  d (partialled)
. *   v       -> Violent crime     Dyv      Dxv      DyV             DxV
. *   p       -> Property crime    Dyp      Dxp      DyP             DxP
. *   m       -> Murder            Dym      Dxm      DyM             DxM
. 
. 
. * ── 3. Cluster-robust SEs ─────────────────────────────────────────
. 
. * Stata's vce(cluster state) on -regress- applies exactly the
. * small-sample correction used in the Fitzgerald et al. (2026)
. * replication code:
. *
. *   V_cluster = (N-1)/(N-k) * G/(G-1) * (X'X)^-1 . S . (X'X)^-1
. *
. * with G = 48 clusters of 12 observations each. No extra code needed.
. 
. 
. * ── 4. Estimator A - First-difference OLS (no controls) ──────────
. 
. * ESTIMAND. Throughout the five estimators below the parameter of
. * interest is alpha, the average partial effect of (first-differenced)
. * effective abortion rate on (first-differenced) state crime rate.
. * Identification rests on:
. *   (1) conditional independence given the 284 partialled controls; and
. *   (2) parallel trends in levels (state FEs absorbed by differencing,
. *       year FEs by the partialling step).
. 
. di _n(2) "========================================"


========================================

. di "STEP 4 - FIRST-DIFFERENCE OLS (no controls)"
STEP 4 - FIRST-DIFFERENCE OLS (no controls)

. di "========================================"
========================================

. 
. * Storage for downstream table.
. tempname FD_b FD_se FD_n

. mat `FD_b'  = J(3, 1, .)

. mat `FD_se' = J(3, 1, .)

. 
. local row = 0

. foreach o in v p m {
  2.     local ++row
  3.     local lab = cond("`o'"=="v","Violent crime",cond("`o'"=="p","Property crime","Murder"))
  4.     local Y = cond("`o'"=="v","Dyv", cond("`o'"=="p","Dyp","Dym"))
  5.     local D = cond("`o'"=="v","Dxv", cond("`o'"=="p","Dxp","Dxm"))
  6.     di _n "  Outcome: `lab'  (y=`Y', d=`D')"
  7.     qui regress `Y' `D', noconstant vce(cluster state)
  8.     mat `FD_b'[`row',1]  = _b[`D']
  9.     mat `FD_se'[`row',1] = _se[`D']
 10.     di as text "    alpha_hat = " as result %9.4f _b[`D'] ///
>        as text "    SE = " as result %9.4f _se[`D']
 11. }

  Outcome: Violent crime  (y=Dyv, d=Dxv)
    alpha_hat =   -0.1521    SE =    0.0337

  Outcome: Property crime  (y=Dyp, d=Dxp)
    alpha_hat =   -0.1084    SE =    0.0219

  Outcome: Murder  (y=Dym, d=Dxm)
    alpha_hat =   -0.2039    SE =    0.0667

. 
. 
. * ── 5. Estimator B - OLS with all 284 controls ───────────────────
. 
. di _n(2) "========================================"


========================================

. di "STEP 5 - OLS WITH ALL 284 CONTROLS (kitchen-sink)"
STEP 5 - OLS WITH ALL 284 CONTROLS (kitchen-sink)

. di "========================================"
========================================

. di "Feasible because p=284 < n=576: OLS technically inverts. But"
Feasible because p=284 < n=576: OLS technically inverts. But

. di "many controls are near-collinear, so SEs balloon. This is what"
many controls are near-collinear, so SEs balloon. This is what

. di "motivates LASSO: keep the controls that matter, drop the rest."
motivates LASSO: keep the controls that matter, drop the rest.

. 
. tempname OLS_b OLS_se OLS_n

. mat `OLS_b'  = J(3, 1, .)

. mat `OLS_se' = J(3, 1, .)

. mat `OLS_n'  = J(3, 1, .)

. 
. local row = 0

. foreach o in v p m {
  2.     local ++row
  3.     local lab = cond("`o'"=="v","Violent crime",cond("`o'"=="p","Property crime","Murder"))
  4.     local Y = cond("`o'"=="v","DyV", cond("`o'"=="p","DyP","DyM"))
  5.     local D = cond("`o'"=="v","DxV", cond("`o'"=="p","DxP","DxM"))
  6.     di _n "  Outcome: `lab'  (y=`Y', d=`D')"
  7.     qui regress `Y' `D' z`o'1-z`o'284, noconstant vce(cluster state)
  8.     mat `OLS_b'[`row',1]  = _b[`D']
  9.     mat `OLS_se'[`row',1] = _se[`D']
 10.     mat `OLS_n'[`row',1]  = 284
 11.     di as text "    alpha_hat = " as result %9.4f _b[`D'] ///
>        as text "    SE = " as result %9.4f _se[`D'] ///
>        as text "    (using 284 controls)"
 12. }

  Outcome: Violent crime  (y=DyV, d=DxV)
    alpha_hat =    0.0134    SE =    0.7149    (using 284 controls)

  Outcome: Property crime  (y=DyP, d=DxP)
    alpha_hat =   -0.1950    SE =    0.2236    (using 284 controls)

  Outcome: Murder  (y=DyM, d=DxM)
    alpha_hat =    2.3411    SE =    2.7831    (using 284 controls)

. 
. 
. * ── 6. Estimator C - Post-Structural LASSO (PSL) ─────────────────
. 
. * PSL = one LASSO on (d, X) -> y with the treatment d FORCED IN
. * (penalty.factor=0 in R; pnotpen() in Stata's rlasso). The selected
. * controls are passed to plain OLS for the final alpha.
. *
. * DESIGN NOTE: The R companion uses cv.glmnet for PSL (3-fold CV).
. * Stata's cvlasso supports notpen(), but its CV path through 100+
. * lambda values with notpen-partialling is dramatically slower than
. * cv.glmnet's coordinate descent. We use the Belloni-Chernozhukov-
. * Hansen RIGOROUS penalty via rlasso instead, with d pinned via
. * pnotpen(). This is fast, deterministic, and answers the same
. * conceptual question PSL was designed to ask: "one LASSO with the
. * treatment forced in, then post-OLS". The penalty rule differs
. * across the two implementations but the recipe is identical.
. *
. * WHY POST-OLS, not LASSO coefficients? LASSO shrinks the
. * coefficients of variables it keeps toward zero - that introduces
. * bias in alpha. Refitting with plain OLS on the selected variables
. * removes the shrinkage. Throughout this script LASSO is used for
. * SELECTION only; the final alpha always comes from OLS.
. 
. di _n(2) "========================================"


========================================

. di "STEP 6 - POST-STRUCTURAL LASSO (PSL)"
STEP 6 - POST-STRUCTURAL LASSO (PSL)

. di "========================================"
========================================

. di "One rlasso on (d, X) -> y with d pinned (pnotpen),"
One rlasso on (d, X) -> y with d pinned (pnotpen),

. di "then OLS on d + selected controls. Rigorous penalty (c=1.1, gamma=0.05)."
then OLS on d + selected controls. Rigorous penalty (c=1.1, gamma=0.05).

. 
. tempname PSL_b PSL_se PSL_n

. mat `PSL_b'  = J(3, 1, .)

. mat `PSL_se' = J(3, 1, .)

. mat `PSL_n'  = J(3, 1, .)

. 
. local row = 0

. foreach o in v p m {
  2.     local ++row
  3.     local lab = cond("`o'"=="v","Violent crime",cond("`o'"=="p","Property crime","Murder"))
  4.     local Y = cond("`o'"=="v","DyV", cond("`o'"=="p","DyP","DyM"))
  5.     local D = cond("`o'"=="v","DxV", cond("`o'"=="p","DxP","DxM"))
  6.     di _n "  Outcome: `lab'  (y=`Y', d=`D')"
  7.     qui rlasso `Y' `D' z`o'1-z`o'284, nocons pnotpen(`D') c(1.1) gamma(0.05)
  8.     local sel "`e(selected)'"
  9.     * e(selected) includes pnotpen variable(s) (here: DxV). Strip d out
.     * so we can pass clean "d + controls" to the post-OLS regression.
.     local sel : list sel - D
 10.     local nsel : word count `sel'
 11.     qui regress `Y' `D' `sel', noconstant vce(cluster state)
 12.     mat `PSL_b'[`row',1]  = _b[`D']
 13.     mat `PSL_se'[`row',1] = _se[`D']
 14.     mat `PSL_n'[`row',1]  = `nsel'
 15.     di as text "    alpha_hat = " as result %9.4f _b[`D'] ///
>        as text "    SE = " as result %9.4f _se[`D'] ///
>        as text "    | `nsel' controls selected"
 16. }

  Outcome: Violent crime  (y=DyV, d=DxV)
    alpha_hat =   -0.1553    SE =    0.0330    | 0 controls selected

  Outcome: Property crime  (y=DyP, d=DxP)
    alpha_hat =   -0.0665    SE =    0.0244    | 1 controls selected

  Outcome: Murder  (y=DyM, d=DxM)
    alpha_hat =   -0.2397    SE =    0.0635    | 1 controls selected

. 
. 
. * ── 7. Estimator D - Double LASSO, rigorous penalty (rlasso) ─────
. 
. * Belloni-Chernozhukov-Hansen Double LASSO with the rigorous penalty:
. *
. *   1. rlasso y on X           -> I_y = selected outcome-equation indices
. *   2. rlasso d on X           -> I_d = selected treatment-equation indices
. *   3. OLS y on d + X[I_y ∪ I_d]   with state-clustered SEs
. *
. * The rigorous penalty is data-driven (Belloni, Chen, Chernozhukov &
. * Hansen 2012) and chosen so that selection-error noise is dominated
. * by the signal. It is much more parsimonious than CV.
. *
. * Penalty constants c=1.1 and gamma=0.05 match the JAE (2026)
. * replication code (readdata_all_OLS.R lines 585, 653) and the
. * R companion's hdm::rlasso call.
. 
. di _n(2) "========================================"


========================================

. di "STEP 7 - DOUBLE LASSO, RIGOROUS PENALTY (rlasso)"
STEP 7 - DOUBLE LASSO, RIGOROUS PENALTY (rlasso)

. di "========================================"
========================================

. di "Two rlasso calls (y on X, d on X), union of selected, then post-OLS."
Two rlasso calls (y on X, d on X), union of selected, then post-OLS.

. di "'Rigorous' = lambda from Belloni et al. (2012) theory, not CV."
'Rigorous' = lambda from Belloni et al. (2012) theory, not CV.

. 
. tempname DLR_b DLR_se DLR_n DLR_Iy DLR_Id DLR_U

. mat `DLR_b'  = J(3, 1, .)

. mat `DLR_se' = J(3, 1, .)

. mat `DLR_n'  = J(3, 1, .)

. mat `DLR_Iy' = J(3, 1, .)

. mat `DLR_Id' = J(3, 1, .)

. mat `DLR_U'  = J(3, 1, .)

. 
. local row = 0

. foreach o in v p m {
  2.     local ++row
  3.     local lab = cond("`o'"=="v","Violent crime",cond("`o'"=="p","Property crime","Murder"))
  4.     local Y = cond("`o'"=="v","DyV", cond("`o'"=="p","DyP","DyM"))
  5.     local D = cond("`o'"=="v","DxV", cond("`o'"=="p","DxP","DxM"))
  6.     di _n "  Outcome: `lab'  (y=`Y', d=`D')"
  7. 
.     * Step 1: LASSO y on X.
.     qui rlasso `Y' z`o'1-z`o'284, nocons c(1.1) gamma(0.05)
  8.     local Iy "`e(selected)'"
  9.     local nIy : word count `Iy'
 10. 
.     * Step 2: LASSO d on X.
.     qui rlasso `D' z`o'1-z`o'284, nocons c(1.1) gamma(0.05)
 11.     local Id "`e(selected)'"
 12.     local nId : word count `Id'
 13. 
.     * Step 3: union of selected, then post-OLS.
.     local U : list Iy | Id
 14.     local nU : word count `U'
 15. 
.     if `nU' > 0 {
 16.         qui regress `Y' `D' `U', noconstant vce(cluster state)
 17.     }
 18.     else {
 19.         * Fall back to univariate first-difference fit (no controls survived).
.         qui regress `Y' `D', noconstant vce(cluster state)
 20.     }
 21. 
.     mat `DLR_b'[`row',1]  = _b[`D']
 22.     mat `DLR_se'[`row',1] = _se[`D']
 23.     mat `DLR_n'[`row',1]  = `nU'
 24.     mat `DLR_Iy'[`row',1] = `nIy'
 25.     mat `DLR_Id'[`row',1] = `nId'
 26.     mat `DLR_U'[`row',1]  = `nU'
 27. 
.     di as text "    |I_y| = " as result %3.0f `nIy' ///
>        as text "  |I_d| = " as result %3.0f `nId' ///
>        as text "  |union| = " as result %3.0f `nU'
 28.     di as text "    alpha_hat = " as result %9.4f _b[`D'] ///
>        as text "    SE = " as result %9.4f _se[`D']
 29. }

  Outcome: Violent crime  (y=DyV, d=DxV)
    |I_y| =   0  |I_d| =   8  |union| =   8
    alpha_hat =   -0.1744    SE =    0.1155

  Outcome: Property crime  (y=DyP, d=DxP)
    |I_y| =   3  |I_d| =  14  |union| =  17
    alpha_hat =   -0.1144    SE =    0.0470

  Outcome: Murder  (y=DyM, d=DxM)
    |I_y| =   1  |I_d| =  12  |union| =  13
    alpha_hat =   -0.1229    SE =    0.1404

. 
. 
. * ── 8. Estimator E - Double LASSO, CV penalty (cvlasso) ──────────
. 
. * Same three steps as section 7, but each LASSO is tuned by 3-fold
. * CV (matching Fitzgerald et al. 2026 footnote 2). Lambda is picked
. * to minimise out-of-sample MSE (lopt = "lambda at MSE minimum").
. *
. * RUNTIME NOTE. Stata's cvlasso at p/n ≈ 0.5 is dramatically slower
. * than R's cv.glmnet: a single call with the default lcount(100)
. * takes 5+ minutes per outcome-equation, which makes the 6-call DL-CV
. * pipeline impractical. We use lcount(10) to get the CV grid down to
. * 10 lambda values, sacrificing precision in lambda selection in
. * exchange for finishing in under 60 seconds per call. The Stata-vs-R
. * drift in Section 14 captures the consequence: DL-CV's α and selected
. * set sizes diverge more than the other Tier-C estimators do.
. 
. di _n(2) "========================================"


========================================

. di "STEP 8 - DOUBLE LASSO, CV PENALTY (cvlasso, 3-fold, lcount=10)"
STEP 8 - DOUBLE LASSO, CV PENALTY (cvlasso, 3-fold, lcount=10)

. di "========================================"
========================================

. 
. tempname DLC_b DLC_se DLC_n DLC_Iy DLC_Id DLC_U

. mat `DLC_b'  = J(3, 1, .)

. mat `DLC_se' = J(3, 1, .)

. mat `DLC_n'  = J(3, 1, .)

. mat `DLC_Iy' = J(3, 1, .)

. mat `DLC_Id' = J(3, 1, .)

. mat `DLC_U'  = J(3, 1, .)

. 
. local row = 0

. foreach o in v p m {
  2.     local ++row
  3.     local lab = cond("`o'"=="v","Violent crime",cond("`o'"=="p","Property crime","Murder"))
  4.     local Y = cond("`o'"=="v","DyV", cond("`o'"=="p","DyP","DyM"))
  5.     local D = cond("`o'"=="v","DxV", cond("`o'"=="p","DxP","DxM"))
  6.     di _n "  Outcome: `lab'  (y=`Y', d=`D')"
  7. 
.     qui cvlasso `Y' z`o'1-z`o'284, nfolds(3) seed(20260520) lopt lglmnet lcount(10)
  8.     local Iy "`e(selected)'"
  9.     local nIy : word count `Iy'
 10. 
.     qui cvlasso `D' z`o'1-z`o'284, nfolds(3) seed(20260520) lopt lglmnet lcount(10)
 11.     local Id "`e(selected)'"
 12.     local nId : word count `Id'
 13. 
.     local U : list Iy | Id
 14.     local nU : word count `U'
 15. 
.     if `nU' > 0 {
 16.         qui regress `Y' `D' `U', noconstant vce(cluster state)
 17.     }
 18.     else {
 19.         qui regress `Y' `D', noconstant vce(cluster state)
 20.     }
 21. 
.     mat `DLC_b'[`row',1]  = _b[`D']
 22.     mat `DLC_se'[`row',1] = _se[`D']
 23.     mat `DLC_n'[`row',1]  = `nU'
 24.     mat `DLC_Iy'[`row',1] = `nIy'
 25.     mat `DLC_Id'[`row',1] = `nId'
 26.     mat `DLC_U'[`row',1]  = `nU'
 27. 
.     di as text "    |I_y| = " as result %3.0f `nIy' ///
>        as text "  |I_d| = " as result %3.0f `nId' ///
>        as text "  |union| = " as result %3.0f `nU'
 28.     di as text "    alpha_hat = " as result %9.4f _b[`D'] ///
>        as text "    SE = " as result %9.4f _se[`D']
 29. }

  Outcome: Violent crime  (y=DyV, d=DxV)
Warning: lopt is at the limit of the lambda range.
Warning: lse is at the limit of the lambda range.
Warning: lopt is at the limit of the lambda range.
Warning: lse is at the limit of the lambda range.
    |I_y| =   0  |I_d| =   0  |union| =   0
    alpha_hat =   -0.1553    SE =    0.0330

  Outcome: Property crime  (y=DyP, d=DxP)
Warning: lse is at the limit of the lambda range.
    |I_y| =   0  |I_d| =   0  |union| =   0
    alpha_hat =   -0.1015    SE =    0.0218

  Outcome: Murder  (y=DyM, d=DxM)
Warning: lopt is at the limit of the lambda range.
Warning: lse is at the limit of the lambda range.
Warning: lopt is at the limit of the lambda range.
    |I_y| =   0  |I_d| =   0  |union| =   0
    alpha_hat =   -0.2061    SE =    0.0514

. 
. 
. * ── 9. Build results_table2.csv (5 estimators x 3 outcomes) ──────
. 
. di _n(2) "========================================"


========================================

. di "STEP 9 - REPLICATION OF PAPER TABLE 2"
STEP 9 - REPLICATION OF PAPER TABLE 2

. di "========================================"
========================================

. 
. preserve

. clear

. set obs 15
Number of observations (_N) was 0, now 15.

. gen str14 method   = ""
(15 missing values generated)

. gen str16 outcome  = ""
(15 missing values generated)

. gen double estimate  = .
(15 missing values generated)

. gen double std_error = .
(15 missing values generated)

. gen long   n_selected = .
(15 missing values generated)

. 
. local outcomes "viol prop murd"

. local methods  "FD OLS PSL DLR DLC"

. local mlabels  `""First diff" "OLS (full)" "PSL" "DL (rigorous)" "DL (CV)""'

. 
. local i = 0

. forvalues r = 1/3 {
  2.     local olab : word `r' of "`labels_viol' `labels_prop' `labels_murd'"
  3. }

. * Easier: fill by hand row-by-row.
. local i = 0

. local rownames : word count `outcomes'

. forvalues oi = 1/3 {
  2.     local o : word `oi' of `outcomes'
  3.     if "`o'" == "viol" local olab "Violent crime"
  4.     if "`o'" == "prop" local olab "Property crime"
  5.     if "`o'" == "murd" local olab "Murder"
  6.     forvalues mi = 1/5 {
  7.         local ++i
  8.         local m : word `mi' of `methods'
  9.         local mlab : word `mi' of `mlabels'
 10.         qui replace method = "`mlab'" in `i'
 11.         qui replace outcome = "`olab'" in `i'
 12.         if "`m'" == "FD" {
 13.             qui replace estimate   = `FD_b'[`oi',1]  in `i'
 14.             qui replace std_error  = `FD_se'[`oi',1] in `i'
 15.             qui replace n_selected = 0               in `i'
 16.         }
 17.         else if "`m'" == "OLS" {
 18.             qui replace estimate   = `OLS_b'[`oi',1]  in `i'
 19.             qui replace std_error  = `OLS_se'[`oi',1] in `i'
 20.             qui replace n_selected = `OLS_n'[`oi',1]  in `i'
 21.         }
 22.         else if "`m'" == "PSL" {
 23.             qui replace estimate   = `PSL_b'[`oi',1]  in `i'
 24.             qui replace std_error  = `PSL_se'[`oi',1] in `i'
 25.             qui replace n_selected = `PSL_n'[`oi',1]  in `i'
 26.         }
 27.         else if "`m'" == "DLR" {
 28.             qui replace estimate   = `DLR_b'[`oi',1]  in `i'
 29.             qui replace std_error  = `DLR_se'[`oi',1] in `i'
 30.             qui replace n_selected = `DLR_n'[`oi',1]  in `i'
 31.         }
 32.         else if "`m'" == "DLC" {
 33.             qui replace estimate   = `DLC_b'[`oi',1]  in `i'
 34.             qui replace std_error  = `DLC_se'[`oi',1] in `i'
 35.             qui replace n_selected = `DLC_n'[`oi',1]  in `i'
 36.         }
 37.     }
 38. }

. 
. gen double ci_lo = estimate - 1.96 * std_error

. gen double ci_hi = estimate + 1.96 * std_error

. order method outcome estimate std_error n_selected ci_lo ci_hi

. list, sepby(outcome) abbreviate(20)

     +-----------------------------------------------------------------------------------+
  1. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |    First diff |  Violent crime | -.15209745 | .03367644 |          0 | -.21810328 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                    -.08609161                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
  2. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |    OLS (full) |  Violent crime |  .01338395 | .71491436 |        284 | -1.3878482 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                     1.4146161                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
  3. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |           PSL |  Violent crime |    -.15529 | .03297352 |          0 |  -.2199181 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                     -.0906619                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
  4. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     | DL (rigorous) |  Violent crime | -.17439471 | .11554708 |          8 | -.40086699 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                     .05207757                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
  5. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |       DL (CV) |  Violent crime |    -.15529 | .03297352 |          0 |  -.2199181 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                     -.0906619                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
  6. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |    First diff | Property crime | -.10837626 | .02193445 |          0 | -.15136778 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                    -.06538474                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
  7. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |    OLS (full) | Property crime | -.19499381 | .22356196 |        284 | -.63317526 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                     .24318764                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
  8. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |           PSL | Property crime | -.06654344 | .02443351 |          1 | -.11443311 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                    -.01865376                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
  9. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     | DL (rigorous) | Property crime | -.11441552 | .04695949 |         17 | -.20645612 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                    -.02237493                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
 10. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |       DL (CV) | Property crime | -.10146893 | .02184183 |          0 | -.14427892 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                    -.05865895                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
 11. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |    First diff |         Murder | -.20386472 | .06672788 |          0 | -.33465136 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                    -.07307808                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
 12. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |    OLS (full) |         Murder |  2.3411482 | 2.7830742 |        284 | -3.1136773 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                     7.7959736                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
 13. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |           PSL |         Murder | -.23974556 | .06349433 |          1 | -.36419445 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                    -.11529667                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
 14. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     | DL (rigorous) |         Murder | -.12290333 | .14041769 |         13 |   -.398122 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                     .15231533                                     |
     +-----------------------------------------------------------------------------------+

     +-----------------------------------------------------------------------------------+
 15. |        method |        outcome |   estimate | std_error | n_selected |      ci_lo |
     |       DL (CV) |         Murder | -.20613219 | .05137252 |          0 | -.30682232 |
     |-----------------------------------------------------------------------------------|
     |                                         ci_hi                                     |
     |                                    -.10544206                                     |
     +-----------------------------------------------------------------------------------+

. export delimited "results_table2.csv", replace
(file results_table2.csv not found)
file results_table2.csv saved

. di _n "Wrote results_table2.csv"

Wrote results_table2.csv

. restore

. 
. 
. * ── 9b. Selection diagnostic CSV ─────────────────────────────────
. 
. preserve

. clear

. set obs 6
Number of observations (_N) was 0, now 6.

. gen str16 outcome = ""
(6 missing values generated)

. gen str14 method  = ""
(6 missing values generated)

. gen long  n_Iy = .
(6 missing values generated)

. gen long  n_Id = .
(6 missing values generated)

. gen long  n_union = .
(6 missing values generated)

. local i = 0

. forvalues oi = 1/3 {
  2.     local o : word `oi' of viol prop murd
  3.     if "`o'" == "viol" local olab "Violent crime"
  4.     if "`o'" == "prop" local olab "Property crime"
  5.     if "`o'" == "murd" local olab "Murder"
  6.     local ++i
  7.     qui replace outcome = "`olab'"        in `i'
  8.     qui replace method  = "DL (rigorous)" in `i'
  9.     qui replace n_Iy    = `DLR_Iy'[`oi',1] in `i'
 10.     qui replace n_Id    = `DLR_Id'[`oi',1] in `i'
 11.     qui replace n_union = `DLR_U'[`oi',1]  in `i'
 12.     local ++i
 13.     qui replace outcome = "`olab'"        in `i'
 14.     qui replace method  = "DL (CV)"       in `i'
 15.     qui replace n_Iy    = `DLC_Iy'[`oi',1] in `i'
 16.     qui replace n_Id    = `DLC_Id'[`oi',1] in `i'
 17.     qui replace n_union = `DLC_U'[`oi',1]  in `i'
 18. }

. list, sepby(outcome) abbreviate(20)

     +--------------------------------------------------------+
     |        outcome          method   n_Iy   n_Id   n_union |
     |--------------------------------------------------------|
  1. |  Violent crime   DL (rigorous)      0      8         8 |
  2. |  Violent crime         DL (CV)      0      0         0 |
     |--------------------------------------------------------|
  3. | Property crime   DL (rigorous)      3     14        17 |
  4. | Property crime         DL (CV)      0      0         0 |
     |--------------------------------------------------------|
  5. |         Murder   DL (rigorous)      1     12        13 |
  6. |         Murder         DL (CV)      0      0         0 |
     +--------------------------------------------------------+

. export delimited "selection_diagnostic.csv", replace
(file selection_diagnostic.csv not found)
file selection_diagnostic.csv saved

. di "Wrote selection_diagnostic.csv"
Wrote selection_diagnostic.csv

. restore

. 
. 
. * ── 10. Figures (4 dark-theme PNGs) ──────────────────────────────
. 
. di _n(2) "========================================"


========================================

. di "STEP 10 - FIGURES"
STEP 10 - FIGURES

. di "========================================"
========================================

. 
. * For figures we work in a small auxiliary dataset.
. preserve

. clear

. set obs 15
Number of observations (_N) was 0, now 15.

. gen byte   outcome_id = ceil(_n/5)   // 1..3

. gen byte   method_id  = mod(_n-1,5)+1 // 1..5

. 
. label define olab 1 "Violent crime" 2 "Property crime" 3 "Murder"

. label values outcome_id olab

. label define mlab 1 "First diff" 2 "OLS (full)" 3 "PSL" 4 "DL (rigorous)" 5 "DL (CV)"

. label values method_id mlab

. 
. gen double estimate = .
(15 missing values generated)

. gen double std_error = .
(15 missing values generated)

. 
. * Fill in from the saved matrices.
. forvalues i = 1/15 {
  2.     local oi = ceil(`i'/5)
  3.     local mi = mod(`i'-1,5)+1
  4.     if `mi' == 1 {
  5.         qui replace estimate  = `FD_b'[`oi',1]  in `i'
  6.         qui replace std_error = `FD_se'[`oi',1] in `i'
  7.     }
  8.     if `mi' == 2 {
  9.         qui replace estimate  = `OLS_b'[`oi',1]  in `i'
 10.         qui replace std_error = `OLS_se'[`oi',1] in `i'
 11.     }
 12.     if `mi' == 3 {
 13.         qui replace estimate  = `PSL_b'[`oi',1]  in `i'
 14.         qui replace std_error = `PSL_se'[`oi',1] in `i'
 15.     }
 16.     if `mi' == 4 {
 17.         qui replace estimate  = `DLR_b'[`oi',1]  in `i'
 18.         qui replace std_error = `DLR_se'[`oi',1] in `i'
 19.     }
 20.     if `mi' == 5 {
 21.         qui replace estimate  = `DLC_b'[`oi',1]  in `i'
 22.         qui replace std_error = `DLC_se'[`oi',1] in `i'
 23.     }
 24. }

. gen double ci_lo = estimate - 1.96*std_error

. gen double ci_hi = estimate + 1.96*std_error

. 
. * Plot positions: invert so "First diff" is at top.
. gen byte y = 6 - method_id

. 
. * === Figure 1: forest plot of all five estimates ===
. twoway ///
>     (rspike ci_lo ci_hi y if method_id==1, horizontal lcolor("$C_STEEL") lwidth(medthick)) ///
>     (scatter y estimate if method_id==1, mcolor("$C_STEEL") msymbol(O) msize(medlarge)) ///
>     (rspike ci_lo ci_hi y if method_id==2, horizontal lcolor("$C_TEXT") lwidth(medthick)) ///
>     (scatter y estimate if method_id==2, mcolor("$C_TEXT") msymbol(O) msize(medlarge)) ///
>     (rspike ci_lo ci_hi y if method_id==3, horizontal lcolor("$C_ORG") lwidth(medthick)) ///
>     (scatter y estimate if method_id==3, mcolor("$C_ORG") msymbol(O) msize(medlarge)) ///
>     (rspike ci_lo ci_hi y if method_id==4, horizontal lcolor("$C_TEAL") lwidth(medthick)) ///
>     (scatter y estimate if method_id==4, mcolor("$C_TEAL") msymbol(O) msize(medlarge)) ///
>     (rspike ci_lo ci_hi y if method_id==5, horizontal lcolor("$C_LTORG") lwidth(medthick)) ///
>     (scatter y estimate if method_id==5, mcolor("$C_LTORG") msymbol(O) msize(medlarge)) ///
>     , by(outcome_id, cols(3) ///
>           title("Treatment-effect estimates: abortion -> crime, 1985-1997", color("$C_TXTHI") 
> size(medsmall)) ///
>           subtitle("Each panel: 95% CIs from state-clustered SEs.", color("$C_TEXT") size(smal
> l)) ///
>           note("Replication of Table 2 in Fitzgerald et al. (2026). Dashed line at zero." , co
> lor("$C_TEXT") size(vsmall)) ///
>           graphregion(fcolor("$C_BG") lcolor("$C_BG")) ///
>           plotregion(fcolor("$C_BG") lcolor("$C_BG")) ///
>           ${DARKBG} legend(off) ///
>           imargin(small)) ///
>     subtitle(, fcolor("$C_BG") lcolor("$C_BG") size(small) color("$C_TXTHI")) ///
>     ylabel(1 "DL (CV)" 2 "DL (rigorous)" 3 "PSL" 4 "OLS (full)" 5 "First diff", ///
>            labcolor("$C_TEXT") angle(0) noticks nogrid) ///
>     xlabel(, labcolor("$C_TEXT")) ///
>     xtitle("alpha hat (effect of effective abortion rate)", color("$C_TEXT") size(small)) ///
>     ytitle("") ///
>     xline(0, lpattern(dash) lcolor("$C_TEXT")) ///
>     ${DARKBG} ///
>     name(fig_forest, replace)

. 
. graph export "stata_double_lasso_estimates.png", replace width(2400) height(1100)
file
    /Users/carlosmendez/Documents/GitHub/starter-academic-v501/content/post/stata_double_lasso
    > /stata_double_lasso_estimates.png saved as PNG format

. di "Wrote stata_double_lasso_estimates.png"
Wrote stata_double_lasso_estimates.png

. restore

. 
. 
. * === Figure 2: selection-count bar chart ===
. preserve

. clear

. set obs 12
Number of observations (_N) was 0, now 12.

. * 3 outcomes x 2 methods (rig, CV) x 2 metrics (Iy, Id) = 12 rows.
. gen byte outcome_id = ceil(_n/4)

. gen byte block = mod(_n-1,4) + 1  // 1=rig Iy, 2=rig Id, 3=cv Iy, 4=cv Id

. label define olab 1 "Violent crime" 2 "Property crime" 3 "Murder"

. label values outcome_id olab

. 
. gen str20 metric = ""
(12 missing values generated)

. gen str14 mtype  = ""
(12 missing values generated)

. gen long  count  = .
(12 missing values generated)

. forvalues i = 1/12 {
  2.     local oi = ceil(`i'/4)
  3.     local bl = mod(`i'-1,4) + 1
  4.     if `bl' == 1 {
  5.         qui replace metric = "|I_y|" in `i'
  6.         qui replace mtype  = "DL (rigorous)" in `i'
  7.         qui replace count  = `DLR_Iy'[`oi',1] in `i'
  8.     }
  9.     if `bl' == 2 {
 10.         qui replace metric = "|I_d|" in `i'
 11.         qui replace mtype  = "DL (rigorous)" in `i'
 12.         qui replace count  = `DLR_Id'[`oi',1] in `i'
 13.     }
 14.     if `bl' == 3 {
 15.         qui replace metric = "|I_y|" in `i'
 16.         qui replace mtype  = "DL (CV)" in `i'
 17.         qui replace count  = `DLC_Iy'[`oi',1] in `i'
 18.     }
 19.     if `bl' == 4 {
 20.         qui replace metric = "|I_d|" in `i'
 21.         qui replace mtype  = "DL (CV)" in `i'
 22.         qui replace count  = `DLC_Id'[`oi',1] in `i'
 23.     }
 24. }

. 
. * x positions: 1,2,5,6,9,10 etc. - grouped by metric, dodged by method.
. gen double xpos = .
(12 missing values generated)

. * rigorous Iy at x=1, CV Iy at x=2; rigorous Id at x=4, CV Id at x=5.
. qui replace xpos = 1 if metric == "|I_y|" & mtype == "DL (rigorous)"

. qui replace xpos = 2 if metric == "|I_y|" & mtype == "DL (CV)"

. qui replace xpos = 4 if metric == "|I_d|" & mtype == "DL (rigorous)"

. qui replace xpos = 5 if metric == "|I_d|" & mtype == "DL (CV)"

. 
. twoway ///
>     (bar count xpos if mtype == "DL (rigorous)", barwidth(0.85) fcolor("$C_TEAL") lcolor("$C_T
> EAL")) ///
>     (bar count xpos if mtype == "DL (CV)",        barwidth(0.85) fcolor("$C_LTORG") lcolor("$C
> _LTORG")) ///
>     (scatter count xpos, mcolor("$C_TXTHI") msymbol(none) mlabel(count) mlabcolor("$C_TXTHI") 
> mlabsize(small) mlabposition(12)) ///
>     , by(outcome_id, cols(3) ///
>           title("Variable selection: rigorous vs CV penalty", color("$C_TXTHI") size(medsmall)
> ) ///
>           subtitle("Out of 284 candidate controls per outcome.", color("$C_TEXT") size(small))
>  ///
>           note("|I_y| = controls selected when LASSOing y on X; |I_d| = controls selected when
>  LASSOing d on X.", color("$C_TEXT") size(vsmall)) ///
>           ${DARKBG}) ///
>     subtitle(, fcolor("$C_BG") lcolor("$C_BG") size(small) color("$C_TXTHI")) ///
>     xlabel(1.5 "|I_y|" 4.5 "|I_d|", noticks labcolor("$C_TEXT")) ///
>     ylabel(, labcolor("$C_TEXT")) ///
>     xtitle("Selection step", color("$C_TEXT") size(small)) ytitle("Number of controls", color(
> "$C_TEXT") size(small)) ///
>     legend(order(1 "DL (rigorous)" 2 "DL (CV)") region(fcolor("$C_BG") lcolor("$C_BG")) textco
> lor("$C_TEXT") size(small) cols(2)) ///
>     ${DARKBG} ///
>     name(fig_select, replace)
option textcolor() not allowed
r(198);

end of do-file
r(198);
