From e50ef04a13c66ea05dbb86541bae68a4adb2bfd6 Mon Sep 17 00:00:00 2001 From: Ashley Burdett <76621383+ANBurdett@users.noreply.github.com> Date: Wed, 1 Apr 2026 18:28:01 +0100 Subject: [PATCH 1/2] Updated regression do files to utilise programs --- .../compile/RegressionEstimates/master.do | 13 +- .../compile/RegressionEstimates/programs.do | 541 +++++- .../RegressionEstimates/reg_education.do | 878 +--------- .../RegressionEstimates/reg_fertility.do | 256 +-- .../reg_financial_distress.do | 244 +-- .../compile/RegressionEstimates/reg_health.do | 595 +------ .../RegressionEstimates/reg_health_mental.do | 1053 ++--------- .../reg_health_wellbeing.do | 1556 +++-------------- .../RegressionEstimates/reg_home_ownership.do | 256 +-- .../compile/RegressionEstimates/reg_income.do | 1328 +------------- .../reg_leave_parental_home.do | 251 +-- .../RegressionEstimates/reg_partnership.do | 490 +----- .../RegressionEstimates/reg_retirement.do | 481 +---- .../RegressionEstimates/reg_socialcare.do | 397 +---- .../compile/RegressionEstimates/reg_wages.do | 4 +- .../RegressionEstimates/variable_update.do | 18 +- 16 files changed, 1283 insertions(+), 7078 deletions(-) diff --git a/input/InitialPopulations/compile/RegressionEstimates/master.do b/input/InitialPopulations/compile/RegressionEstimates/master.do index e9d83a6cf..4d848f010 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/master.do +++ b/input/InitialPopulations/compile/RegressionEstimates/master.do @@ -49,7 +49,7 @@ set matsize 1000 **************************************************************************************/ * Working directory -global dir_work "D:\Dasha\ESSEX\_SimPaths\_SimPaths_UK\regression_estimates" +global dir_work "/Users/ashleyburdett/Documents/SimPaths_UK/programs/regression_estimates" * Directory which contains do files global dir_do "${dir_work}/do" @@ -64,13 +64,13 @@ global dir_raw_results "${dir_work}/raw_results" global dir_results "${dir_work}/results" * Pooled dataset for estimates -global estimation_sample "D:\Dasha\ESSEX\_SimPaths\_SimPaths_UK\initial_populations\data\UKHLS_pooled_ipop.dta" +global estimation_sample "/Users/ashleyburdett/Library/CloudStorage/Box-Box/CeMPA shared area/_SimPaths/_SimPathsUK/input_processing/initial_populations/data/ukhls_pooled_ipop.dta" * Pooled dataset with predicted wages after Heckman -global estimation_sample2 "D:\Dasha\ESSEX\_SimPaths\_SimPaths_UK\initial_populations\data\UKHLS_pooled_ipop2.dta" +global estimation_sample2 "/Users/ashleyburdett/Library/CloudStorage/Box-Box/CeMPA shared area/_SimPaths/_SimPathsUK/input data preparation_Darias backup folder/regression_estimates/data/UKHLS_pooled_ipop2.dta" * Directory containing external input data -global dir_external_data "$dir_work/external_data" +global dir_external_data "/Users/ashleyburdett/Library/CloudStorage/Box-Box/CeMPA shared area/_SimPaths/_SimPathsUK/input data preparation_Darias backup folder/regression_estimates/external_data" * Directory containing results of comparison of various weights global weight_checks "${dir_work}/weight_checks" @@ -91,8 +91,8 @@ global first_sim_year "2010" global last_sim_year "2025" -* Globals used for all processes +* Globals used for all processes global weight "dwt" global regions "UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN" //UKI is London (reference) @@ -219,13 +219,12 @@ global s3d_if_condition "provide_informal_care & Partnered & stm >= 15" // I * Finanicial distress and health processes * TO ADD - /******************************************************************************* * ESTIMATION FILES *******************************************************************************/ -/**/ + do "${dir_do}/reg_education.do" do "${dir_do}/reg_leave_parental_home.do" diff --git a/input/InitialPopulations/compile/RegressionEstimates/programs.do b/input/InitialPopulations/compile/RegressionEstimates/programs.do index f807f1dc9..e8f941fbd 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/programs.do +++ b/input/InitialPopulations/compile/RegressionEstimates/programs.do @@ -8,7 +8,8 @@ mata set matastrict off end mata: -mata clear +mata clear +mata set matastrict off void trim_matrices() { V = st_matrix("V") @@ -52,29 +53,255 @@ void write_all_to_excel() { printf("Done\n") } -void extract_and_export_labels(string scalar sheet) { - nonzero_b_flag = st_matrix("nonzero_b_flag")' - stripe = st_matrixcolstripe("e(b)") - varnames = stripe[.,2] - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - n_labs = rows(labels_no_bl) - for (i=1; i<=n_labs; i++) { - row = i + 1 - stata("quietly putexcel A" + strofreal(row) + " = " + char(34) + labels_no_bl[i] + char(34)) - } - for (j=1; j<=n_labs; j++) { - col_num = j + 2 - col_name = "" - n_temp = col_num - while (n_temp > 0) { - rem = mod(n_temp - 1, 26) - col_name = char(65 + rem) + col_name - n_temp = floor((n_temp - 1) / 26) - } - stata("quietly putexcel " + col_name + "1 = " + char(34) + labels_no_bl[j] + char(34)) - } +void extract_and_export_labels(string scalar sheet, real scalar max_n, real scalar is_ologit) { + nonzero_b_flag = st_matrix("nonzero_b_flag")' + + if (is_ologit) { + stripe = st_matrixcolstripe("b") + } + else { + stripe = st_matrixcolstripe("e(b)") + } + + varnames = stripe[.,2] + varnames_no_bl = select(varnames, nonzero_b_flag :== 1) + labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) + labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") + // Handle lags: L.var -> var_L1 + labels_no_bl = /// + regexm(labels_no_bl, "^L\.") :* /// + (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) + // Handle 1L.var + labels_no_bl = /// + regexm(labels_no_bl, "^1L\.") :* /// + (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// + (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) + // Handle 2L.var + labels_no_bl = /// + regexm(labels_no_bl, "^L2\.") :* /// + (regexr(labels_no_bl, "^L2\.", "") :+ "_L2") :+ /// + (!regexm(labels_no_bl, "^L2\.") :* labels_no_bl) + + // Truncate labels if max_n is specified (>0) + if (max_n > 0 & rows(labels_no_bl) > max_n) { + labels_no_bl = labels_no_bl[1..max_n, .] + } + + n_labs = rows(labels_no_bl) + for (i=1; i<=n_labs; i++) { + row = i + 1 + stata("quietly putexcel A" + strofreal(row) + " = " + char(34) + labels_no_bl[i] + char(34)) + } + for (j=1; j<=n_labs; j++) { + col_num = j + 2 + col_name = "" + n_temp = col_num + while (n_temp > 0) { + rem = mod(n_temp - 1, 26) + col_name = char(65 + rem) + col_name + n_temp = floor((n_temp - 1) / 26) + } + stata("quietly putexcel " + col_name + "1 = " + char(34) + labels_no_bl[j] + char(34)) + } +} + + +void write_diagonal_to_excel() { + V_trimmed = st_matrix("V_trimmed") + b_trimmed = st_matrix("b_trimmed") + n = cols(b_trimmed) + + // Write coefficients + for (i=1; i<=n; i++) { + row = i + 1 + coef = b_trimmed[1,i] + stata("quietly putexcel B" + strofreal(row) + " = (" + strofreal(coef) + ")") + } + + printf("Writing diagonal V-C matrix\n") + + // Write full matrix structure with zeros in off-diagonal + for (i=1; i<=n; i++) { + for (j=1; j<=n; j++) { + row = i + 1 + col_num = j + 2 + col_name = "" + temp = col_num + while (temp > 0) { + rem = mod(temp - 1, 26) + col_name = char(65 + rem) + col_name + temp = floor((temp - 1) / 26) + } + // Only write actual variance on diagonal, zero elsewhere + if (i == j) val = V_trimmed[i,j] + else val = 0 + stata("quietly putexcel " + col_name + strofreal(row) + " = (" + strofreal(val) + ")") + } + if (mod(i, 5) == 0) printf(" Row %g/%g\n", i, n) + } + printf("Done (diagonal matrix)\n") +} + +void truncate_to_n(real scalar max_n) { + b_trimmed = st_matrix("b_trimmed") + V_trimmed = st_matrix("V_trimmed") + nonzero_b_flag = st_matrix("nonzero_b_flag") + n = cols(b_trimmed) + + if (n > max_n) { + b_trimmed = b_trimmed[1, 1..max_n] + V_trimmed = V_trimmed[1..max_n, 1..max_n] + + // Update nonzero_b_flag to only reflect first max_n kept variables + kept = 0 + for (i=1; i<=rows(nonzero_b_flag); i++) { + if (nonzero_b_flag[i,1] == 1) kept = kept + 1 + if (kept > max_n) nonzero_b_flag[i,1] = 0 + } + + st_matrix("b_trimmed", b_trimmed) + st_matrix("V_trimmed", V_trimmed) + st_matrix("nonzero_b_flag", nonzero_b_flag) + printf("Truncated to first %g non-zero estimates\n", max_n) + } + else { + printf("Fewer than %g non-zero estimates (%g found), no truncation needed\n", max_n, n) + } +} + +void reorder_cuts_to_end() { + b = st_matrix("b") + V = st_matrix("V") + stripe = st_matrixcolstripe("e(b)") + + // Identify cut point columns + is_cut = (stripe[.,1] :== "/") + not_cut = (is_cut :== 0) + is_cut_row = is_cut' + not_cut_row = not_cut' + + // Reorder b and V + b_reordered = select(b, not_cut_row), select(b, is_cut_row) + V_temp = select(V, not_cut_row), select(V, is_cut_row) + V_reordered = select(V_temp', not_cut_row), select(V_temp', is_cut_row) + V_reordered = V_reordered' + + // Reorder stripe and rename cuts before writing back + stripe_reordered = (select(stripe, not_cut) \ select(stripe, is_cut)) + for (i=1; i<=rows(stripe_reordered); i++) { + if (stripe_reordered[i,1] == "/" & regexm(stripe_reordered[i,2], "^cut([0-9]+)")) { + stripe_reordered[i,2] = "Cut" + regexs(1) + stripe_reordered[i,1] = "" + } + } + + // Write back with correct stripe + st_matrix("b", b_reordered) + st_matrix("V", V_reordered) + st_matrixcolstripe("b", stripe_reordered) + st_matrixcolstripe("V", stripe_reordered) + st_matrixrowstripe("V", stripe_reordered) + +} + + +void build_gologit_structure(real scalar n_outcomes) { + b = st_matrix("b") + V = st_matrix("V") + + // Step 1: Remove zero coefficients (baseline categories) + keep = (b :!= 0) + nonzero_b = select(b, keep) + V_trimmed = select(V, keep) + V_trimmed = select(V_trimmed', keep)' + st_matrix("nonzero_b", nonzero_b) + st_matrix("nonzero_b_flag", keep) + + // Step 2: Detect repeated coefficients (proportional odds vars) + n = cols(nonzero_b) + n_per = n / (n_outcomes - 1) + repetition_flag = J(n, 1, 0) + tol = 1e-8 + for (i=1; i<=n; i++) { + found = 0 + for (j=1; j<=n; j++) { + if (found == 0 & i != j & abs(nonzero_b[1,i] - nonzero_b[1,j]) < tol) { + repetition_flag[i] = 1 + found = 1 + } + } + } + unique_flag = 1 :- repetition_flag + st_matrix("repetition_flag", repetition_flag') + st_matrix("unique_flag", unique_flag') + + + // Step 3: Build structure vector + structure_a = J(1, n_per, 1) + structure_b = unique_flag[n_per+1::n]' + structure = structure_a, structure_b + st_matrix("structure", structure) + + // Step 4: Apply structure to b + b_structure = structure :* nonzero_b + keep2 = (b_structure :!= 0) + nonzero_b_structure = select(b_structure, keep2) + st_matrix("nonzero_b_structure", nonzero_b_structure) + + // Step 5: Apply structure to V + square_structure_a = J(n, 1, 1) * structure + square_structure_b = square_structure_a' + square_structure = square_structure_a :* square_structure_b + var_structure = square_structure :* V_trimmed + row_keep = (rowsum(abs(var_structure)) :!= 0) + col_keep = (colsum(abs(var_structure)) :!= 0) + nonzero_var_structure = select(select(var_structure, col_keep), row_keep) + st_matrix("nonzero_var_structure", nonzero_var_structure) + + printf("Gologit structure built: %g unique coefficients\n", cols(nonzero_b_structure)) +} + +void export_labels_gologit(string scalar sheet) { + nonzero_b_flag = st_matrix("nonzero_b_flag")' + unique_flag = st_matrix("unique_flag")' + structure = st_matrix("structure")' + stripe = st_matrixcolstripe("e(b)") + + catnames = stripe[.,1] + varnames = stripe[.,2] + varnames_no_bl = select(varnames, nonzero_b_flag :== 1) + catnames_no_bl = select(catnames, nonzero_b_flag :== 1) + + // Clean variable names + labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) + labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") + labels_no_bl = (regexm(labels_no_bl, "^L\.") :* + (regexr(labels_no_bl, "^L\.", "") :+ "_L1")) :+ + (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) + + // Add category suffix only for non-proportional odds vars (unique_flag == 1) + labels_no_bl = labels_no_bl :+ + (("_" :+ catnames_no_bl) :* (unique_flag[1::rows(labels_no_bl)] :== 1)) + + // Filter by structure + final_labels = select(labels_no_bl, structure[1::rows(labels_no_bl)] :== 1) + + n_labs = rows(final_labels) + for (i=1; i<=n_labs; i++) { + row = i + 1 + stata("quietly putexcel A" + strofreal(row) + " = " + char(34) + final_labels[i] + char(34)) + } + for (j=1; j<=n_labs; j++) { + col_num = j + 2 + col_name = "" + n_temp = col_num + while (n_temp > 0) { + rem = mod(n_temp - 1, 26) + col_name = char(65 + rem) + col_name + n_temp = floor((n_temp - 1) / 26) + } + stata("quietly putexcel " + col_name + "1 = " + char(34) + final_labels[j] + char(34)) + } } end @@ -114,10 +341,10 @@ end capture program drop export_labels_to_excel program define export_labels_to_excel - syntax, sheet(string) + syntax, domain(string) sheet(string) * Set up Excel file - putexcel set "$dir_results/reg_socialcare", sheet("`sheet'") modify + putexcel set "$dir_results/reg_`domain'", sheet("`sheet'") modify * Vertical labels forvalues i = 1/`n_labels' { @@ -155,9 +382,9 @@ end capture program drop export_gof_probit program define export_gof_probit - syntax, row(integer) label(string) + syntax, domain(string) row(integer) label(string) - putexcel set "$dir_results/reg_socialcare", sheet("Gof") modify + putexcel set "$dir_results/reg_`domain'", sheet("Gof") modify local row1 = `row' local row2 = `row' + 1 @@ -180,9 +407,9 @@ end capture program drop export_gof_ols program define export_gof_ols - syntax, row(integer) label(string) + syntax, domain(string) row(integer) label(string) - putexcel set "$dir_results/reg_socialcare", sheet("Gof") modify + putexcel set "$dir_results/reg_`domain'", sheet("Gof") modify local row1 = `row' local row2 = `row' + 1 @@ -201,13 +428,13 @@ end capture program drop save_raw_results program define save_raw_results - syntax, process(string) title(string) [ifcond(string)] + syntax, domain(string) process(string) title(string) [ifcond(string)] * Save to Excel matrix results = r(table) matrix results = results[1..6,1...]' - putexcel set "$dir_raw_results/social_care/socialcare", /// + putexcel set "$dir_raw_results/`domain'/`domain'", /// sheet("Process `process'") replace putexcel A3 = matrix(results), names nformat(number_d2) putexcel J4 = matrix(e(V)) @@ -219,16 +446,16 @@ program define save_raw_results local note `"addnote("Note: Regression if condition = (`ifcond')")"' } - * Check if probit/logit or OLS - if "`e(cmd)'" == "probit" | "`e(cmd)'" == "logit" { + * Check if probit/logit/ologit or OLS + if "`e(cmd)'" == "probit" | "`e(cmd)'" == "logit" | "`e(cmd)'" == "ologit" | "`e(cmd)'" == "gologit2" { outreg2 stats(coef se pval) using /// - "$dir_raw_results/social_care/`process'.doc", replace /// + "$dir_raw_results/`domain'/`process'.doc", replace /// title("`title'") ctitle(Model) label side dec(2) noparen /// addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) `note' } else { outreg2 stats(coef se pval) using /// - "$dir_raw_results/social_care/`process'.doc", replace /// + "$dir_raw_results/`domain'/`process'.doc", replace /// title("`title'") ctitle(Model) label side dec(2) noparen /// addstat(R2, e(r2)) `note' } @@ -241,31 +468,57 @@ end capture program drop export_results_to_excel program define export_results_to_excel - syntax, sheet(string) [probit] + syntax, domain(string) sheet(string) [probit ologit gformula maxestimates(integer 11)] * Store estimates matrix b = e(b) matrix V = e(V) + * For ologit, reorder cuts to end before trimming + if "`ologit'" == "ologit" { + mata: reorder_cuts_to_end() + } + * Trim zero coefficients mata: trim_matrices() - * Check matrix stability - check_matrix_stability + * For gformula, further truncate to first maxestimates non-zero estimates + if "`gformula'" == "gformula" { + mata: truncate_to_n(`maxestimates') + } + + * Check matrix stability (skip for gformula) + if "`gformula'" != "gformula" { + check_matrix_stability + } * Export to Excel - use modify mode (file already created in setup) - putexcel set "$dir_results/reg_socialcare", sheet("`sheet'") modify + putexcel set "$dir_results/reg_`domain'", sheet("`sheet'") modify putexcel A1 = "REGRESSOR" putexcel B1 = "COEFFICIENT" * Write coefficients cell-by-cell - mata: write_all_to_excel() + if "`gformula'" == "gformula" { + putexcel C1 = "VARIANCE" + mata: write_diagonal_to_excel() + } + else { + mata: write_all_to_excel() + } * Extract and export labels - mata: extract_and_export_labels("`sheet'") + if "`ologit'" == "ologit" { + mata: extract_and_export_labels("`sheet'", 0, 1) + } + else if "`gformula'" == "gformula" { + mata: extract_and_export_labels("`sheet'", `maxestimates', 0) + } + else { + mata: extract_and_export_labels("`sheet'", 0, 0) + } * Store model statistics - if "`probit'" == "probit" { + if "`probit'" == "probit" | "`ologit'" == "ologit" { scalar r2_p = e(r2_p) scalar chi2 = e(chi2) scalar ll = e(ll) @@ -278,15 +531,21 @@ program define export_results_to_excel end +/*============================================================================== + COMPLETE WORKFLOW PROGRAMS +==============================================================================*/ + * Complete workflow: save sample, export results, and clean up capture program drop process_regression program define process_regression - syntax, process(string) sheet(string) title(string) gofrow(integer) /// - goflabel(string) [ifcond(string) probit] + syntax, domain(string) process(string) sheet(string) title(string) /// + gofrow(integer) goflabel(string) [ifcond(string) /// + probit gformula maxestimates(integer 11)] * Save raw results - save_raw_results, process("`process'") title("`title'") ifcond("`ifcond'") + save_raw_results, domain("`domain'") process("`process'") /// + title("`title'") ifcond("`ifcond'") * Save sample for validation gen in_sample = e(sample) @@ -294,13 +553,18 @@ program define process_regression save "$dir_validation_data/`process'_sample", replace * Export results to Excel - if "`probit'" == "probit" { - export_results_to_excel, sheet("`sheet'") probit - export_gof_probit, row(`gofrow') label("`goflabel'") + if "`gformula'" == "gformula" { + export_results_to_excel, domain("`domain'") sheet("`sheet'") /// + gformula maxestimates(`maxestimates') + export_gof_ols, domain("`domain'") row(`gofrow') label("`goflabel'") + } + else if "`probit'" == "probit" { + export_results_to_excel, domain("`domain'") sheet("`sheet'") probit + export_gof_probit, domain("`domain'") row(`gofrow') label("`goflabel'") } else { - export_results_to_excel, sheet("`sheet'") - export_gof_ols, row(`gofrow') label("`goflabel'") + export_results_to_excel, domain("`domain'") sheet("`sheet'") + export_gof_ols, domain("`domain'") row(`gofrow') label("`goflabel'") } * Clean up @@ -310,44 +574,139 @@ program define process_regression end -* Specialized workflow for multinomial logit models -capture program drop process_mlogit -program define process_mlogit - - syntax, process(string) sheet(string) title(string) gofrow(integer) /// - goflabel(string) outcomes(integer) [ifcond(string)] - - * Save raw results (skip outreg2 for mlogit - it has issues) - matrix results = r(table) - matrix results = results[1..6,1...]' - putexcel set "$dir_raw_results/social_care/socialcare", /// - sheet("Process `process'") replace - putexcel A3 = matrix(results), names nformat(number_d2) - putexcel J4 = matrix(e(V)) - - * Save sample for validation - gen in_sample = e(sample) - - * Generate predictions (number depends on outcomes) - if `outcomes' == 3 { - predict p1 p2 p3 - } - else if `outcomes' == 4 { - predict p1 p2 p3 p4 - } - else if `outcomes' == 5 { - predict p1 p2 p3 p4 p5 - } - - save "$dir_validation_data/`process'_sample", replace - - * Export results to Excel - export_results_to_excel, sheet("`sheet'") probit - export_gof_probit, row(`gofrow') label("`goflabel'") - - * Clean up - drop in_sample p* - scalar drop _all - matrix drop _all - + +* Specialized workflow for ordered logit models +capture program drop process_ologit +program define process_ologit + + syntax, domain(string) process(string) sheet(string) title(string) /// + gofrow(integer) goflabel(string) [ifcond(string)] + + * Save raw results + save_raw_results, domain("`domain'") process("`process'") /// + title("`title'") ifcond("`ifcond'") + + * Save sample for validation + gen in_sample = e(sample) + predict p + save "$dir_validation_data/`process'_sample", replace + + * reorder_cuts_to_end removed - handled inside export_results_to_excel + + * Export results to Excel + export_results_to_excel, domain("`domain'") sheet("`sheet'") ologit + + * Export GoF + export_gof_probit, domain("`domain'") row(`gofrow') label("`goflabel'") + + * Clean up + drop in_sample p + scalar drop _all + matrix drop _all + end + + +* Specialized workflow for generalized ordered logit models +capture program drop process_gologit +program define process_gologit + + syntax, domain(string) process(string) sheet(string) title(string) /// + gofrow(integer) goflabel(string) outcomes(integer) [ifcond(string)] + * Note: outcomes() = total number of categories INCLUDING the base category + + * Save raw results + matrix results = r(table) + matrix results = results[1..6,1...]' + putexcel set "$dir_raw_results/`domain'/`domain'", /// + sheet("Process `process'") modify + putexcel A3 = matrix(results), names nformat(number_d2) + putexcel J4 = matrix(e(V)) + + * Save to Word + capture which outreg2 + if _rc == 0 { + if "`ifcond'" != "" { + local note `"addnote("Note: Regression if condition = (`ifcond')")"' + } + outreg2 stats(coef se pval) using /// + "$dir_raw_results/`domain'/`process'.doc", replace /// + title("`title'") ctitle(Education level) label side dec(2) noparen /// + addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) `note' + } + + * Save sample and predictions + gen in_sample = e(sample) + local plist "" + forvalues k = 1/`outcomes' { + local plist "`plist' p`k'" + } + predict `plist' + save "$dir_validation_data/`process'_sample", replace + + * Store model summary statistics + scalar r2_p = e(r2_p) + scalar chi2 = e(chi2) + scalar ll = e(ll) + scalar N_sample = e(N) + + * Store estimates in matrices + matrix b = e(b) + matrix V = e(V) + + * Raw output + putexcel set "$dir_results/reg_`domain'", sheet("`sheet'_raw") modify + putexcel A1 = matrix(b'), names nformat(number_d2) + putexcel A1 = "CATEGORY" + putexcel B1 = "REGRESSOR" + putexcel C1 = "COEFFICIENT" + + * Build gologit structure + mata: build_gologit_structure(`outcomes') + + * Eigenvalue stability check + matrix symeigen X lambda = nonzero_var_structure + scalar max_eig = lambda[1,1] + scalar min_ratio = lambda[1, colsof(lambda)] / max_eig + if max_eig < 1.0e-12 { + display as error "CRITICAL ERROR: Variance-covariance matrix is near singular." + exit 999 + } + if min_ratio < 1.0e-12 { + display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio + exit 506 + } + display "VCV stability check passed. Max eigenvalue: " max_eig + display "Min/Max ratio: " min_ratio + + * Export to Excel + putexcel set "$dir_results/reg_`domain'", sheet("`sheet'") modify + putexcel A1 = "REGRESSOR" + putexcel B1 = "COEFFICIENT" + putexcel B2 = matrix(nonzero_b_structure') + putexcel C2 = matrix(nonzero_var_structure) + + * Extract and export labels + mata: export_labels_gologit("`sheet'") + + * Goodness of fit + putexcel set "$dir_results/reg_`domain'", sheet("Gof") modify + local row2 = `gofrow' + 1 + local row3 = `gofrow' + 2 + putexcel A`gofrow' = "`goflabel'", bold + putexcel A`row2' = "Pseudo R-squared" + putexcel B`row2' = r2_p + putexcel A`row3' = "N" + putexcel B`row3' = N_sample + putexcel E`row2' = "Chi^2" + putexcel F`row2' = chi2 + putexcel E`row3' = "Log likelihood" + putexcel F`row3' = ll + + * Clean up + drop in_sample `plist' + scalar drop _all + matrix drop _all + +end + diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_education.do b/input/InitialPopulations/compile/RegressionEstimates/reg_education.do index 43a17efbb..8a6024af1 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_education.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_education.do @@ -1,14 +1,15 @@ -****************************************************************************************** +/******************************************************************************* * PROJECT: SimPaths UK * SECTION: Education * OBJECT: Final Probit & Generalised Logit Models - Weighted -* AUTHORS: Patryk Bronka, Daria Popova, Justin van de Ven, Aleksandra Kolndrekaj -* LAST UPDATE: 18 Feb 2026 AK +* AUTHORS: Patryk Bronka, Daria Popova, Justin van de Ven, +* Aleksandra Kolndrekaj +* LAST UPDATE: 26 Mar 2026 (AB) * COUNTRY: UK * * NOTES: * -***************************************************************************************** +*******************************************************************************/ clear all set more off @@ -17,45 +18,46 @@ set type double //set maxvar 120000 set maxvar 30000 -******************************************************************* + +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_education.log", replace -******************************************************************* -* Set Excel file -* Info sheet +/******************************* SET EXCEL FILE *******************************/ + putexcel set "$dir_results/reg_education", sheet("Info") replace -putexcel A1 = "Description:" +putexcel A1 = "Description:", bold putexcel B1 = "Model parameters governing projection of education status" -putexcel A2 = "Authors: Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" -putexcel A3 = "Last edit: 18 Feb 2026 AK" +putexcel A2 = "Authors:" +putexcel B2 = "Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" " +putexcel A3 = "Last edit:" +putexcel B3 = "26 Mar 2026 AB" -putexcel A4 = "Process:", bold -putexcel B4 = "Description:", bold +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold -putexcel A5 = "E1a" -putexcel B5 = "Probit regression estimates of exiting education" +putexcel A6 = "E1a" +putexcel B6 = "Prob. remain in education" -putexcel A6 = "E1b" -putexcel B6 = "Probit regression estimates of returning to education" +putexcel A7 = "E1b" +putexcel B7 = "Prob. retrun to education" -putexcel A7 = "E2" -putexcel B7 = "Generalized ordered logit regression estimates of education attainment - individuals aged 16+ exiting education." +putexcel A8 = "E2" +putexcel B8 = "Educational attainment when leave education" -putexcel A8 = "E2_raw" -putexcel B8 = "Raw generalized ordered logit regression estimates of education attainment - individuals aged 16+ exiting education. Useful for the 'Gologit predictor' file." +putexcel A9 = "E2_raw" +putexcel B9 = "Raw attainment results" -putexcel A10 = "Notes:", bold -putexcel B10 = "Estimation sample: UK_ipop.dta with grossing up weight dwt" -putexcel B11 = "Conditions for processes are defined as globals in master.do" -//putexcel B12 = "E1a: Compared to the previous version, where age and age squared were used, age is now centered (at age 23) and its effect is allowed to change after age 18." +putexcel A11 = "Notes:", bold +putexcel B11 = "Estimation sample: UK_ipop.dta with grossing up weight dwt" +putexcel B12 = "Conditions for processes are defined as globals in master.do" +//putexcel B13 = "E1a: Compared to the previous version, where age and age squared were used, age is now centered (at age 23) and its effect is allowed to change after age 18." putexcel set "$dir_results/reg_education", sheet("Gof") modify putexcel A1 = "Goodness of fit", bold - /********************************* PREPARE DATA *******************************/ use "${estimation_sample}", clear @@ -70,6 +72,10 @@ do "${dir_do}/variable_update.do" /********************************** ESTIMATION ********************************/ +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" + + /****************** E1a: PROBABILITY OF REMAINING IN EDUCATION ****************/ display "${e1a_if_condition}" @@ -77,813 +83,45 @@ probit Dst i.Dgn Dag Dag_sq /*Dag_c Dag_c_sq Dag_post18_sq*/ li.Ded /// li.Dehmf_c3_Medium li.Dehmf_c3_Low /// li.Ydses_c5_Q2 li.Ydses_c5_Q3 li.Ydses_c5_Q4 li.Ydses_c5_Q5 /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${e1a_if_condition} /// - [pw=dwt], vce(robust) - - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/education/education", sheet("Process E1a") /// - replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using "$dir_raw_results/education/E1a.doc", /// - replace /// -title("Process E1a: Probability Remaining In Education") /// - ctitle(Continuing student) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${e1a_if_condition})"') - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -* Save sample estimate validation -save "$dir_validation_data/E1a_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Save estimates for use in SimPaths - -* Store estimates -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - -* Export into Excel -putexcel set "$dir_results/reg_education", sheet("E1a") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve -putexcel set "$dir_results/reg_education", sheet("E1a") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - -* Use Mata to extract nice labels from colstripe of e(b) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_education", sheet("E1a") modify + if ${e1a_if_condition} [pw=${weight}], vce(robust) - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { +process_regression, domain("education") process("E1a") sheet("E1a") /// + title("Process E1a: Prob. remain in education") /// + gofrow(3) goflabel("E1a - Remain in education") /// + ifcond("${e1a_if_condition}") probit - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_education", sheet("Gof") modify - -putexcel A3 = "E1a - Leaving education", bold - -putexcel A5 = "Pseudo R-squared" -putexcel B5 = r2_p -putexcel A6 = "N" -putexcel B6 = N_sample -putexcel E5 = "Chi^2" -putexcel F5 = chi2 -putexcel E6 = "Log likelihood" -putexcel F6 = ll - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - - /****************** E1b: PROBABILITY OF RETURNING TO EDUCATION ****************/ display "${e1b_if_condition}" probit der i.Dgn Dag Dag_sq li.Dcpst_Partnered /// -li.Deh_c4_High li.Deh_c4_Low /// -li.Dehmf_c3_Medium li.Dehmf_c3_Low /// -li.Les_c3_NotEmployed li.Les_c3_Employed /// -l.Dnc l.Dnc02 /// -$regions Year_transformed Y2020 Y2021 $ethnicity /// -if ${e1b_if_condition} /// - [pw=dwt], vce(robust) - - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/education/education", sheet("Process E1b") /// - modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using "$dir_raw_results/education/E1b.doc", /// - replace /// -title("Process E1b: Probability Returning To Education") /// - ctitle(Returning student) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${e1b_if_condition})"') - - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -* Save sample for later use (internal validation) -save "$dir_validation_data/E1b_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Prepare to store results in Excel - -* Eliminate rows and columns containing zeros (baseline cats) -matrix b = e(b) -matrix V = e(V) - - -mata: - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - -* Export into Excel -putexcel set "$dir_results/reg_education", sheet("E1b") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve -putexcel set "$dir_results/reg_education", sheet("E1b") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - -* Use Mata to extract nice labels from colstripe of e(b) (replacement for Stata 14) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) encoding(utf8) - - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_education", sheet("E1b") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_education", sheet("Gof") modify - -putexcel A8 = "E1b - Returning to education", bold - -putexcel A10 = "Pseudo R-squared" -putexcel B10 = r2_p -putexcel A11 = "N" -putexcel B11 = N_sample -putexcel E10 = "Chi^2" -putexcel F10 = chi2 -putexcel E11 = "Log likelihood" -putexcel F11 = ll - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all + li.Deh_c4_High li.Deh_c4_Low li.Dehmf_c3_Medium li.Dehmf_c3_Low /// + li.Les_c3_NotEmployed li.Les_c3_Employed l.Dnc l.Dnc02 /// + $regions Year_transformed Y2020 Y2021 $ethnicity /// + if ${e1b_if_condition} [pw=${weight}], vce(robust) +process_regression, domain("education") process("E1b") sheet("E1b") /// + title("Process E1b: Prob. return to education") /// + gofrow(7) goflabel("E1b - Return to education") /// + ifcond("${e1b_if_condition}") probit /****************** E2: EDUCATION ATTAINMENT WHEN LEAVE SCHOOL ****************/ display "${e2_if_condition}" gologit2 deh_c3_recoded i.Dgn Dag Dag_sq /// - i.L_Dehmf_c3_Medium i.L_Dehmf_c3_Low /// - $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${e2_if_condition} [pw=dwt], autofit - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/education/education", sheet("Process E2") /// - modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - - -outreg2 stats(coef se pval) using "$dir_raw_results/education/E2.doc", /// - replace /// -title("Process E2: Educational Attainment When Leave School") /// - ctitle(Education level) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${e2_if_condition})"') - - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p1 p2 p3 - -* Save sample for estimates validation -save "$dir_validation_data/E2_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Raw output -putexcel set "$dir_results/reg_education", sheet("E2_raw") modify -putexcel A1 = matrix(b'), names nformat(number_d2) -putexcel A1 = "CATEGORY" -putexcel B1 = "REGRESSOR" -putexcel C1 = "COEFFICIENT" - -* Estimated coefficients -scalar no_coefs_all = colsof(b) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - nonzero_b = select(b, keep) - - nonzero_b - - // Return to Stata - st_matrix("nonzero_b", nonzero_b) - st_matrix("nonzero_b_flag", keep) -end - -* Inspect -matrix list b -matrix list nonzero_b -matrix list nonzero_b_flag - -* Save dimensions -scalar no_nonzero_b = colsof(nonzero_b) -scalar no_nonzero_b_per = no_nonzero_b / 2 - -* Address repetition of proportional odds covariates - -* Generate repetition/unique observation flag -mata: - // Import matrices into mata - nonzero_b_mata = st_matrix("nonzero_b") - - // Generate binary vector =1 if coefficient repeated - n = cols(nonzero_b_mata) - repetition_flag = J(n, 1, 0) - - // use tolerance based comparison to avoid precision errors - tol = 1e-8 - - for (i = 1; i <= n; i++) { - for (j = 1; j <= n; j++) { - if (i != j && abs(nonzero_b_mata[i] - nonzero_b_mata[j]) < tol) { - repetition_flag[i] = 1 - break - } - } - } - repetition_flag - - // Generate binary vector =1 if coefficient not repeated - unique_flag = 1 :- repetition_flag - - // Return to Stata - st_matrix("repetition_flag", repetition_flag') - st_matrix("unique_flag", unique_flag') - -end - -* Generate vector to multiply the coef vector with to eliminate the repetitions -* of coefficients for vars that satify the proportional odds assumptions -matrix structure_a = J(1,no_nonzero_b_per,1) -matrix structure_b = unique_flag[1,no_nonzero_b_per+1..no_nonzero_b] -matrix structure = structure_a, structure_b - -* Inspect -matrix list structure_a -matrix list structure_b -matrix list structure -matrix list nonzero_b - -* Eliminate repetitions -mata: - // Call matrices into mata - var = st_matrix("var") - structure = st_matrix("structure") - nonzero_b = st_matrix("nonzero_b") - - // Convert reptitions into zeros - b_structure = structure :* nonzero_b - - b_structure - - // Eliminate zeros - keep = (b_structure :!= 0) - - nonzero_b_structure = select(b_structure, keep) - - // Export to Stata - st_matrix("b_structure", b_structure) - st_matrix("nonzero_b_structure", nonzero_b_structure) - -end - -matrix list nonzero_b_structure - -* Export into Excel -putexcel set "$dir_results/reg_education", sheet("E2") modify -putexcel A1 = matrix(nonzero_b_structure'), names //nformat(number_d2) - - -* Variance-covariance matrix -* Eliminate zeros (baseline categories) -mata: - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - V_trimmed - - // Return to Stata - st_matrix("var", V_trimmed) -end - -matrix list var - - -* Address repetition due to proportional odds being satisfied for some covars -matrix square_structure_a = J(no_nonzero_b,1,1) * structure -matrix square_structure_b = square_structure_a' - -matrix list square_structure_a -matrix list square_structure_b -mata: - // Call matrices into mata - var = st_matrix("var") - - // Create structure matrix (0 = eliminate) - square_structure_a = st_matrix("square_structure_a") - square_structure_b = st_matrix("square_structure_b") - - // Element-by-element multiplication - square_structure = square_structure_a :* square_structure_b - var_structure = square_structure :* var - - // Eliminate zeros - row_keep = rowsum(abs(var_structure)) :!= 0 - col_keep = colsum(abs(var_structure)) :!= 0 - - nonzero_var_structure = select(select(var_structure, row_keep), col_keep) - - // Return to Stata - st_matrix("nonzero_var_structure", nonzero_var_structure) -end - -matrix list nonzero_var_structure - -* Export to Excel -putexcel set "$dir_results/reg_education", sheet("E2") modify -putexcel C2 = matrix(nonzero_var_structure) - -*======================================================================= -* Eigenvalue stability check for trimmed variance-covariance matrix - -matrix symeigen X lambda = nonzero_var_structure - -* Largest eigenvalue -scalar max_eig = lambda[1,1] - -* Ratio of smallest to largest eigenvalue -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Check 1: near-singularity -if max_eig < 1.0e-12 { - display as error "CRITICAL ERROR: Variance-covariance matrix is near singular." - display as error "Max eigenvalue = " max_eig - exit 999 -} - -* Check 2: ill-conditioning -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned." - display as error "Min/Max eigenvalue ratio = " min_ratio - exit 506 -} - -display "VCV stability check passed." -display "Max eigenvalue: " max_eig -display "Min/Max ratio: " min_ratio -*======================================================================= - -* Labels -preserve - -putexcel set "$dir_results/reg_education", sheet("E2") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - - * Use Mata to extract nice labels from colstripe of e(b) (replacement for Stata 14) -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -* Run Mata block -mata: - // Import matrices from Stata - nonzero_b_flag = st_matrix("nonzero_b_flag")' - unique_flag = st_matrix("unique_flag")' - structure = st_matrix("structure")' - stripe = st_matrixcolstripe("e(b)") - - // Extract variable and category names - catnames = stripe[.,1] - varnames = stripe[.,2] - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - catnames_no_bl = select(catnames, nonzero_b_flag :== 1) - - // Handle lags - labels_no_bl = regexm(varnames_no_bl, "^L_") :* (regexr(varnames_no_bl, "^L_", "") :+ "_L1") :+ (!regexm(varnames_no_bl, "^L_") :* varnames_no_bl) - - // Add category name when flag is not unique - labels_no_bl = labels_no_bl :+ "_" :+ (catnames_no_bl :* (unique_flag[1::rows(labels_no_bl)] :!= 0)) - - // Clean labels - labels_no_bl = usubinstr(labels_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Filter for structure == 1 - nonzero_labels_structure = select(labels_no_bl, structure[1::rows(labels_no_bl)] :== 1) - - // Add header row - nonzero_labels_structure = "v1"\nonzero_labels_structure - - // Write to temporary file - fh = fopen(st_local("dir_results") + "/temp_labels.txt", "w") - for (i=1; i<=rows(nonzero_labels_structure); i++) { - fput(fh, nonzero_labels_structure[i]) - } - fclose(fh) -end - - * Import cleaned labels into Stata as new dataset - import delimited "$dir_results/temp_labels.txt", clear varnames(1) encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_education", sheet("E2") modify - - * Vertical labels - sum n, meanonly - local N = r(max)+1 - - forvalue i = 2/`N' { - local j = `i' - 1 - putexcel A`i' = v1[`j'] - } - - * Horizontal labels - sum n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - *Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - -* Goodness of fit - -putexcel set "$dir_results/reg_education", sheet("Gof") modify - -putexcel A13 = "E2 - Education attainment", bold - -putexcel A15 = "Pseudo R-squared" -putexcel B15 = r2_p -putexcel A16 = "N" -putexcel B16 = N_sample - - -* Clean up -drop in_sample p1 p2 p3 -scalar drop _all -matrix drop _all - + i.L_Dehmf_c3_Medium i.L_Dehmf_c3_Low /// + $regions Year_transformed Y2020 Y2021 $ethnicity /// + if ${e2_if_condition} [pw=${weight}], autofit + +process_gologit, domain("education") process("E2") sheet("E2") /// + title("Process E2: Educational Attainment When Leave School") /// + gofrow(11) goflabel("E2 - Education attainment") /// + outcomes(3) /// + ifcond("${e2_if_condition}") + + +display "Education analysis complete!" + capture log close diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_fertility.do b/input/InitialPopulations/compile/RegressionEstimates/reg_fertility.do index f26a980c1..f34cc25ca 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_fertility.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_fertility.do @@ -16,25 +16,25 @@ set type double //set maxvar 120000 set maxvar 30000 -******************************************************************* + +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_fertility.log", replace -******************************************************************* -* Set Excel file -* Info sheet +/******************************* SET EXCEL FILE *******************************/ putexcel set "$dir_results/reg_fertility", sheet("Info") replace -putexcel A1 = "Description:" +putexcel A1 = "Description:", bold putexcel B1 = "Model parameters governing projection of fertility" -putexcel A2 = "Authors: Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" +putexcel A2 = "Authors:" +putexcel B2 = "Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" putexcel A3 = "Last edit: 18 Feb 2026 AK" -putexcel A4 = "Process:", bold -putexcel B4 = "Description:", bold -putexcel A5 = "F1" -putexcel B5 = "Probit regression estimates of the probability of having a child for women aged 18-44" +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold +putexcel A6 = "F1" +putexcel B6 = "Prob have a child for women" putexcel A10 = "Notes:", bold putexcel B10 = "Estimation sample: UK_ipop.dta with grossing up weight dwt" @@ -56,14 +56,16 @@ sort idperson swv * Adjust variables do "${dir_do}/variable_update.do" - -*-------------------------------------------------- + * Any-children dummy (dchpd collapsing) -*-------------------------------------------------- replace dchpd = 1 if inlist(dchpd, 2, 3, 4, 5) fre dchpd -/********************************** ESTIMATION ********************************/ + +/********************************* ESTIMATION *********************************/ + +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" /*********************** F1: PROBABILITY OF HAVING A CHILD ********************/ display "${f1_if_condition}" @@ -80,227 +82,15 @@ probit dchpd /// FertilityRate /// /*li.Les_c3_Student*/ li.Les_c3_NotEmployed /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${f1_if_condition} [pw=dwt], vce(robust) + if ${f1_if_condition} [pw=${weight}], vce(robust) +process_regression, domain("fertility") process("F1") sheet("F1") /// + title("Process F1: Prob. have a child") /// + gofrow(3) goflabel("F1 - Have child") /// + ifcond("${f1_if_condition}") probit -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/fertility/fertility", /// - sheet("Process F1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using "$dir_raw_results/fertility/F1.doc", replace /// - title("Process F1: Probability of Having a Child") /// - ctitle(Having a Child) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${f1_if_condition})"') - - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -* Save sample for estimate validation -save "$dir_validation_data/F1_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates -matrix b = e(b) -matrix V = e(V) - -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - -* Export into Excel -putexcel set "$dir_results/reg_fertility", sheet("F1") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - -* Labels -preserve -putexcel set "$dir_results/reg_fertility", sheet("F1") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - -* Use Mata to extract nice labels from colstripe of e(b) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_fertility", sheet("F1") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - -* Export model fit statistics -putexcel set "$dir_results/reg_fertility", sheet("Gof") modify - -putexcel A3 = "U1- Partnership formation", bold - -putexcel A5 = "Pseudo R-squared" -putexcel B5 = r2_p -putexcel A6 = "N" -putexcel B6 = N_sample -putexcel E5 = "Chi^2" -putexcel F5 = chi2 -putexcel E6 = "Log likelihood" -putexcel F6 = ll - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - +display "Fertility analysis complete!" + capture log close - diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_financial_distress.do b/input/InitialPopulations/compile/RegressionEstimates/reg_financial_distress.do index e17bc4469..79292df3f 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_financial_distress.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_financial_distress.do @@ -13,222 +13,82 @@ set more off set mem 200m set maxvar 30000 - -******************************************************************* +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_financial_distress.log", replace -******************************************************************* -/********************************* PREPARE DATA *******************************/ +****************************** SAMPLE GLOBALS ********************************** +* For master file -use ${estimation_sample}, clear +global HM1_L_if_condition "" -* Set data -xtset idperson swv -sort idperson swv -* Adjust variables -do "${dir_do}/variable_update.do" -/* DP: Household income/poverty/employment transition variables are moved to variable_update.do */ +/******************************* SET EXCEL FILE *******************************/ -* Remove children -drop if dag < 16 +putexcel set "$dir_results/reg_financial_distress", sheet("Info") replace +putexcel A1 = "Description:", bold +putexcel B1 = "Model parameters governing projection of financial distress" +putexcel A2 = "Authors:" +putexcel B2 = "Andy Baxter, Erik Igelström" +putexcel A3 = "Last edit: 17 Feb 2026" -********************************************************************** -* HM1_L: GHQ12 score 0-36 of all working-age adults - baseline effects * -********************************************************************** +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold -logit financial_distress /// -ib11.exp_emp i.lhw_c5 D.log_income i.exp_incchange ib0.exp_poverty L.ypncp L.ypnoab /// -L.i.econ_benefits L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.dhe_mcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.financial_distress /// -i.dgn L.dag L.dagsq i.deh_c3 i.dot stm /// -[pweight=${weight}] /// -, vce(cluster idperson) - * save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' -putexcel set "$dir_raw_results/financial_distress/financial_distress", sheet("UK") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) +putexcel A10 = "Notes:", bold -gen in_sample = e(sample) -predict p +putexcel set "$dir_results/reg_financial_distress", sheet("Gof") modify +putexcel A1 = "Goodness of fit", bold -save "$dir_validation_data/financial_distress", replace +/********************************* PREPARE DATA *******************************/ -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) +use "${estimation_sample}", clear +* Set data +xtset idperson swv +sort idperson swv -* Results +* Adjust variables +do "${dir_do}/variable_update.do" +/* DP: Household income/poverty/employment transition variables are moved to variable_update.do */ + +* Remove children +drop if dag < 16 -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) +/********************************** ESTIMATION ********************************/ -* Store variance-covariance matrix +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" -preserve -putexcel set "$dir_raw_results/financial_distress/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) +************************************************************************ +* HM1_L: GHQ12 score 0-36 of all working-age adults - baseline effects * +************************************************************************ -import excel "$dir_raw_results/financial_distress/var_cov", sheet("var_cov") clear +logit financial_distress /// + ib11.exp_emp i.lhw_c5 D.log_income i.exp_incchange /// + ib0.exp_poverty L.ypncp L.ypnoab /// + L.i.econ_benefits L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.dhe_mcs /// + L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.financial_distress /// + i.dgn L.dag L.dagsq i.deh_c3 i.dot stm /// + [pw=${weight}] /// + , vce(cluster idperson) -describe -local no_vars = `r(k)' -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} +process_regression, domain("financial_distress") process("HM1_L") /// + sheet("HM1_L") /// + title("Process HM1_L: Financial distress") /// + gofrow(3) goflabel("HM1_L - Financial distress") /// + ifcond("${HM1_L_if_condition}") probit -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_financial_distress", sheet("UK") modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_financial_distress", sheet("UK") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" // 13.exp_emp -putexcel A3 = "UnemployedToEmployed" // 31.exp_emp -putexcel A4 = "PersistentUnemployed" // 33.exp_emp -putexcel A5 = "Lhw_10" // 10.lhw_c5 -putexcel A6 = "Lhw_20" // 20.lhw_c5 -putexcel A7 = "Lhw_30" // 30.lhw_c5 -putexcel A8 = "Lhw_40" // 40.lhw_c5 -putexcel A9 = "RealIncomeChange" // D.log_income -putexcel A10 = "RealIncomeDecrease_D" // 1.exp_incchange -putexcel A11 = "NonPovertyToPoverty" // 1.exp_poverty -putexcel A12 = "PovertyToNonPoverty" // 2.exp_poverty -putexcel A13 = "PersistentPoverty" // 3.exp_poverty -putexcel A14 = "Ypncp_L1" // L.ypncp -putexcel A15 = "Ypnoab_L1" // L.ypnoab -putexcel A16 = "D_Econ_benefits" // 1L.econ_benefits -putexcel A17 = "D_Home_owner_L1" // 1L.dhh_owned -putexcel A18 = "Dcpst_Single_L1" // 2L.dcpst -putexcel A19 = "Dnc_L1" // L.dnc -putexcel A20 = "Dhe_pcs_L1" // L.dhe_pcs -putexcel A21 = "Dhe_mcs_L1" // L.dhe_mcs -putexcel A22 = "UKC" // 1L.drgn1 -putexcel A23 = "UKD" // 2L.drgn1 -putexcel A24 = "UKE" // 4L.drgn1 -putexcel A25 = "UKF" // 5L.drgn1 -putexcel A26 = "UKG" // 6L.drgn1 -putexcel A27 = "UKH" // 7L.drgn1 -putexcel A28 = "UKJ" // 9L.drgn1 -putexcel A29 = "UKK" // 10L.drgn1 -putexcel A30 = "UKL" // 11L.drgn1 -putexcel A31 = "UKM" // 12L.drgn1 -putexcel A32 = "UKN" // 13L.drgn1 -putexcel A33 = "Ydses_c5_Q2_L1" // 2L.ydses_c5 -putexcel A34 = "Ydses_c5_Q3_L1" // 3L.ydses_c5 -putexcel A35 = "Ydses_c5_Q4_L1" // 4L.ydses_c5 -putexcel A36 = "Ydses_c5_Q5_L1" // 5L.ydses_c5 -putexcel A37 = "Dlltsd01_L1" // L.dlltsd01 -putexcel A38 = "FinancialDistress" // L.financial_distress -putexcel A39 = "Dgn" // 1.dgn -putexcel A40 = "Dag_L1" // L.dag -putexcel A41 = "Dag_sq_L1" // L.dagsq -putexcel A42 = "Deh_c3_Medium" // 2.deh_c3 -putexcel A43 = "Deh_c3_Low" // 3.deh_c3 -putexcel A44 = "EthnicityAsian" // 2.dot -putexcel A45 = "EthnicityBlack" // 3.dot -putexcel A46 = "EthnicityOther" // 4.dot -putexcel A47 = "Year_transformed" // stm -putexcel A48 = "Constant" // _cons - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" // 13.exp_emp -putexcel D1 = "UnemployedToEmployed" // 31.exp_emp -putexcel E1 = "PersistentUnemployed" // 33.exp_emp -putexcel F1 = "Lhw_10" // 10.lhw_c5 -putexcel G1 = "Lhw_20" // 20.lhw_c5 -putexcel H1 = "Lhw_30" // 30.lhw_c5 -putexcel I1 = "Lhw_40" // 40.lhw_c5 -putexcel J1 = "RealIncomeChange" // D.log_income -putexcel K1 = "RealIncomeDecrease_D" // 1.exp_incchange -putexcel L1 = "NonPovertyToPoverty" // 1.exp_poverty -putexcel M1 = "PovertyToNonPoverty" // 2.exp_poverty -putexcel N1 = "PersistentPoverty" // 3.exp_poverty -putexcel O1 = "Ypncp_L1" // L.ypncp -putexcel P1 = "Ypnoab_L1" // L.ypnoab -putexcel Q1 = "D_Econ_benefits" // 1L.econ_benefits -putexcel R1 = "D_Home_owner_L1" // 1L.dhh_owned -putexcel S1 = "Dcpst_Single_L1" // 2L.dcpst -putexcel T1 = "Dnc_L1" // L.dnc -putexcel U1 = "Dhe_pcs_L1" // L.dhe_pcs -putexcel V1 = "Dhe_mcs_L1" // L.dhe_mcs -putexcel W1 = "UKC" // 1L.drgn1 -putexcel X1 = "UKD" // 2L.drgn1 -putexcel Y1 = "UKE" // 4L.drgn1 -putexcel Z1 = "UKF" // 5L.drgn1 -putexcel AA1 = "UKG" // 6L.drgn1 -putexcel AB1 = "UKH" // 7L.drgn1 -putexcel AC1 = "UKJ" // 9L.drgn1 -putexcel AD1 = "UKK" // 10L.drgn1 -putexcel AE1 = "UKL" // 11L.drgn1 -putexcel AF1 = "UKM" // 12L.drgn1 -putexcel AG1 = "UKN" // 13L.drgn1 -putexcel AH1 = "Ydses_c5_Q2_L1" // 2L.ydses_c5 -putexcel AI1 = "Ydses_c5_Q3_L1" // 3L.ydses_c5 -putexcel AJ1 = "Ydses_c5_Q4_L1" // 4L.ydses_c5 -putexcel AK1 = "Ydses_c5_Q5_L1" // 5L.ydses_c5 -putexcel AL1 = "Dlltsd01_L1" // L.dlltsd01 -putexcel AM1 = "FinancialDistress" // L.financial_distress -putexcel AN1 = "Dgn" // 1.dgn -putexcel AO1 = "Dag_L1" // L.dag -putexcel AP1 = "Dag_sq_L1" // L.dagsq -putexcel AQ1 = "Deh_c3_Medium" // 2.deh_c3 -putexcel AR1 = "Deh_c3_Low" // 3.deh_c3 -putexcel AS1 = "EthnicityAsian" // 2.dot -putexcel AT1 = "EthnicityBlack" // 3.dot -putexcel AU1 = "EthnicityOther" // 4.dot -putexcel AV1 = "Year_transformed" // stm -putexcel AW1 = "Constant" // _cons - -drop in_sample p -scalar drop r2_p N chi2 ll + +display "Financial distress analysis complete!" + + +capture log close + \ No newline at end of file diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_health.do b/input/InitialPopulations/compile/RegressionEstimates/reg_health.do index 5195f1269..b84d592e2 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_health.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_health.do @@ -2,8 +2,9 @@ * PROJECT: SimPaths UK * SECTION: Health * OBJECT: Health status and Disability -* AUTHORS: Daria Popova, Justin van de Ven, Aleksandra Kolndrekaj -* LAST UPDATE: 18 Feb 2026 AK +* AUTHORS: Daria Popova, Justin van de Ven, Aleksandra Kolndrekaj, +* Ashley Burdett +* LAST UPDATE: 26 Mar 2026 (AB) * COUNTRY: UK * * NOTES: Combined former a and b processes. @@ -16,34 +17,33 @@ set type double //set maxvar 120000 set maxvar 30000 -******************************************************************* +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_health.log", replace -******************************************************************* -* Set Excel file -* Info sheet +/******************************* SET EXCEL FILE *******************************/ putexcel set "$dir_results/reg_health", sheet("Info") replace -putexcel A1 = "Description:" +putexcel A1 = "Description:", bold putexcel B1 = "Model parameters governing projection self-reported health status" -putexcel A2 = "Authors: Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" -putexcel A3 = "Last edit: 18 Feb 2026 AK" +putexcel A2 = "Authors:" +putexcel B2 = "Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj, Ashley Burdett" " +putexcel A3 = "Last edit: 26 Mar 2026 AK" -putexcel A4 = "Process:", bold -putexcel B4 = "Description:", bold +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold -putexcel A5 = "H1" -putexcel B5 = "Generalized ordered logit regression estimates of self reported health status" -putexcel B6 = "Covariates that satisfy the parallel lines assumption have one estimate for all categories of the dependent variable and are present once in the table" -putexcel B7 = "Covariates that do not satisfy the parallel lines assumption have an estimate for each estimated category of the dependent variable. These covariates have the dependent variable category appended to their name." +putexcel A6 = "H1" +putexcel B6 = "Self rated health (5 cat)" +putexcel B7 = "Covariates that satisfy the parallel lines assumption have one estimate for all categories of the dependent variable and are present once in the table" +putexcel B8 = "Covariates that do not satisfy the parallel lines assumption have an estimate for each estimated category of the dependent variable. These covariates have the dependent variable category appended to their name." -putexcel A8 = "H1_raw" -putexcel B8 = "Raw generalized ordered logit regression estimates of self reported health status. Useful for the 'Gologit predictor' file." +putexcel A9 = "H1_raw" +putexcel B9 = "elf rated health (5 cat) - unformatted output" -putexcel A11 = "H2" -putexcel B11 = "Probit regression estimates of the probability of being long-term sick or disabled" +putexcel A10 = "H2" +putexcel B10 = "Prob. long-term sick or disabled" putexcel A15 = "Notes:", bold putexcel B15 = "Estimation sample: UK_ipop.dta with grossing up weight dwt" @@ -69,6 +69,10 @@ do "${dir_do}/variable_update.do" /********************************** ESTIMATION ********************************/ +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" + + /********************** H1: SELF-REPORTED HEALTH STATUS ***********************/ display "${h1_if_condition}" @@ -77,538 +81,43 @@ gologit2 dhe Ded Dgn Dag Dag_sq /// /*Ded_Dag Ded_Dag_sq Ded_Dgn /// */ i.Deh_c4_Medium i.Deh_c4_Low i.Deh_c4_Na /// /*L_Les_c4_Student*/ L_Les_c4_NotEmployed L_Les_c4_Retired /// L_Ydses_c5_Q2 L_Ydses_c5_Q3 L_Ydses_c5_Q4 L_Ydses_c5_Q5 /// - L_Dhhtp_c4_CoupleChildren L_Dhhtp_c4_SingleNoChildren L_Dhhtp_c4_SingleChildren L_Dlltsd01 /// + L_Dhhtp_c4_CoupleChildren L_Dhhtp_c4_SingleNoChildren /// + L_Dhhtp_c4_SingleChildren L_Dlltsd01 /// $regions Year_transformed Y2020 Y2021 $ethnicity if /// - ${h1_if_condition} [pw=dwt], autofit + ${h1_if_condition} [pw=${weight}], autofit -*Note: In gologit2, the coefficients show how covariates affect the log-odds of being above a certain category vs. at or below it. - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/health/health", /// - sheet("Process H1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/health/H1.doc", replace /// -title("Process H1: Self-Reported Health Status") /// - ctitle(Health) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${h1_if_condition})"') - - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p1 p2 p3 p4 p5 - -* Save sample for estimate validation -save "$dir_validation_data/H1_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) - - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Raw output -putexcel set "$dir_results/reg_health", sheet("H1_raw") modify -putexcel A1 = matrix(b'), names nformat(number_d2) -putexcel A1 = "CATEGORY" -putexcel B1 = "REGRESSOR" -putexcel C1 = "COEFFICIENT" - -* Estimated coefficients -scalar no_coefs_all = colsof(b) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - nonzero_b = select(b, keep) - - // Inspect - nonzero_b - - // Return to Stata - st_matrix("nonzero_b", nonzero_b) - st_matrix("nonzero_b_flag", keep) -end - -* Inspect -matrix list b -matrix list nonzero_b -matrix list nonzero_b_flag - -* Save dimensions -scalar no_nonzero_b = colsof(nonzero_b) -scalar no_nonzero_b_per = no_nonzero_b / 4 // number of categories-1 - -* Address repetition of proportional odds covariates - -* Generate repetition/unique observation flag -mata: - // Import matrices into mata - nonzero_b_mata = st_matrix("nonzero_b") - - // Generate binary vector =1 if coefficient repeated - n = cols(nonzero_b_mata) - repetition_flag = J(n, 1, 0) - - // use tolerance based comparison to avoid precision errors - tol = 1e-8 - - for (i = 1; i <= n; i++) { - for (j = 1; j <= n; j++) { - if (i != j && abs(nonzero_b_mata[i] - nonzero_b_mata[j]) < tol) { - repetition_flag[i] = 1 - break - } - } - } - repetition_flag - - // Generate binary vector =1 if coefficient not repeated - unique_flag = 1 :- repetition_flag - - // Return to Stata - st_matrix("repetition_flag", repetition_flag') - st_matrix("unique_flag", unique_flag') - -end - -* Generate vector to multiply the coef vector with to eliminate the -* repetitions of coefficients for vars that satify the proportional odds -* assumptions -matrix structure_a = J(1,no_nonzero_b_per,1) -matrix structure_b = unique_flag[1,no_nonzero_b_per+1..no_nonzero_b] -matrix structure = structure_a, structure_b - -* Inspect -matrix list structure_a -matrix list structure_b -matrix list structure -matrix list nonzero_b - -* Eliminate repetitions -mata: - // Call matrices into mata - var = st_matrix("var") - structure = st_matrix("structure") - nonzero_b = st_matrix("nonzero_b") - - // Convert reptitions into zeros - b_structure = structure :* nonzero_b - - b_structure - - // Eliminate zeros - keep = (b_structure :!= 0) - - nonzero_b_structure = select(b_structure, keep) - - // Export to Stata - st_matrix("b_structure", b_structure) - st_matrix("nonzero_b_structure", nonzero_b_structure) - -end - -matrix list nonzero_b_structure - -* Export into Excel -putexcel set "$dir_results/reg_health", sheet("H1") modify -putexcel A1 = matrix(nonzero_b_structure'), names nformat(number_d2) - - -* Variance-covariance matrix -* ELiminate zeros (baseline categories) -mata: - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - V_trimmed - - // Return to Stata - st_matrix("var", V_trimmed) -end - -matrix list var - -* Address repetition due to proportional odds being satisfied for some covars -matrix square_structure_a = J(no_nonzero_b,1,1) * structure -matrix square_structure_b = square_structure_a' - -matrix list square_structure_a -matrix list square_structure_b -mata: - // Call matrices into mata - var = st_matrix("var") - - // Create structure matrix (0 = eliminate) - square_structure_a = st_matrix("square_structure_a") - square_structure_b = st_matrix("square_structure_b") - - // Element-by-element multiplication - square_structure = square_structure_a :* square_structure_b - var_structure = square_structure :* var - - // Eliminate zeros - row_keep = rowsum(abs(var_structure)) :!= 0 - col_keep = colsum(abs(var_structure)) :!= 0 - - nonzero_var_structure = select(select(var_structure, row_keep), col_keep) - - // Return to Stata - st_matrix("nonzero_var_structure", nonzero_var_structure) -end - -matrix list nonzero_var_structure - -* Export to Excel -putexcel set "$dir_results/reg_health", sheet("H1") modify -putexcel C2 = matrix(nonzero_var_structure) - -*======================================================================= -* Eigenvalue stability check for trimmed variance-covariance matrix - -matrix symeigen X lambda = nonzero_var_structure - -* Largest eigenvalue -scalar max_eig = lambda[1,1] - -* Ratio of smallest to largest eigenvalue -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Check 1: near-singularity -if max_eig < 1.0e-12 { - display as error "CRITICAL ERROR: Variance-covariance matrix is near singular." - display as error "Max eigenvalue = " max_eig - exit 999 -} - -* Check 2: ill-conditioning -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned." - display as error "Min/Max eigenvalue ratio = " min_ratio - exit 506 -} +/* +Note: In gologit2, the coefficients show how covariates affect the log-odds of +being above a certain category vs. at or below it. +*/ -display "VCV stability check passed." -display "Max eigenvalue: " max_eig -display "Min/Max ratio: " min_ratio -*======================================================================= - -* Labels -preserve -putexcel set "$dir_results/reg_health", sheet("H1") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - * Use Mata to extract nice labels from colstripe of e(b) (replacement for Stata 14) -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -* Run Mata block -mata: - // Import matrices from Stata - nonzero_b_flag = st_matrix("nonzero_b_flag")' - unique_flag = st_matrix("unique_flag")' - structure = st_matrix("structure")' - stripe = st_matrixcolstripe("e(b)") - - // Extract variable and category names - catnames = stripe[.,1] - varnames = stripe[.,2] - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - catnames_no_bl = select(catnames, nonzero_b_flag :== 1) - - // Handle lags - labels_no_bl = regexm(varnames_no_bl, "^L_") :* (regexr(varnames_no_bl, "^L_", "") :+ "_L1") :+ (!regexm(varnames_no_bl, "^L_") :* varnames_no_bl) - - // Add category name when flag is not unique - labels_no_bl = labels_no_bl :+ "_" :+ (catnames_no_bl :* (unique_flag[1::rows(labels_no_bl)] :!= 0)) - - // Clean labels - labels_no_bl = usubinstr(labels_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Filter for structure == 1 - nonzero_labels_structure = select(labels_no_bl, structure[1::rows(labels_no_bl)] :== 1) - - // Add header row - nonzero_labels_structure = "v1"\nonzero_labels_structure - - // Write to temporary file - fh = fopen(st_local("dir_results") + "/temp_labels.txt", "w") - for (i=1; i<=rows(nonzero_labels_structure); i++) { - fput(fh, nonzero_labels_structure[i]) - } - fclose(fh) -end - - * Import cleaned labels into Stata as new dataset - import delimited "$dir_results/temp_labels.txt", clear varnames(1) encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_health", sheet("H1") modify - - * Vertical labels - sum n, meanonly - local N = r(max)+1 - - forvalue i = 2/`N' { - local j = `i' - 1 - putexcel A`i' = v1[`j'] - } - - * Horizontal labels - sum n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - *Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - -* Export model fit statistics -putexcel set "$dir_results/reg_health", sheet("Gof") modify - -putexcel A3 = "H1 - Health status", bold - -putexcel A5 = "Pseudo R-squared" -putexcel B5 = r2_p -putexcel A6 = "N" -putexcel B6 = N_sample - -* Clean up -drop in_sample p1 p2 p3 p4 p5 -scalar drop _all -matrix drop _all +process_gologit, domain("health") process("H1") sheet("H1") /// + title("Process H1: Self Rated Health") /// + gofrow(3) goflabel("H1 - Self-rated health") /// + outcomes(5) /// + ifcond("${h1_if_condition}") /**************** H2: PROBABILITY LONG-TERM SICK OR DISABLED ******************/ display "${h2_if_condition}" probit dlltsd01 i.Dgn Dag Dag_sq /// - Deh_c4_Medium Deh_c4_Low Deh_c4_Na /// - L_Ydses_c5_Q2 L_Ydses_c5_Q3 L_Ydses_c5_Q4 L_Ydses_c5_Q5 /// - L_Dhe_pcs L_Dhe_mcs /// - L_Dlltsd01 /// - L_Dhhtp_c4_CoupleChildren L_Dhhtp_c4_SingleNoChildren L_Dhhtp_c4_SingleChildren /// - $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${h2_if_condition} [pw = dwt], vce(robust) - - - * raw results -matrix results = r(table) -matrix results = results[1..6,1...]' -putexcel set "$dir_raw_results/health/health", sheet("Process H2") modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) -outreg2 stats(coef se pval) using "$dir_raw_results/health/H2.doc", replace /// -title("Process H2b: Probit regression estimates for being long-term sick or disabled - people aged 16+ not in continuous education") /// - ctitle(long-term sick or disabled) label side dec(2) noparen addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) - -gen in_sample = e(sample) - -predict p - -* Save sample for later use (internal validation) -save "$dir_validation_data/H2_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - - -* Export into Excel -putexcel set "$dir_results/reg_health", sheet("H2") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -putexcel set "$dir_results/reg_health", sheet("H2") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - -* Mata: extract and clean labels -mata: - // Import matrices - nonzero_b_flag = st_matrix("nonzero_b_flag")' - stripe = st_matrixcolstripe("e(b)") - - // Extract varnames from stripe (2nd column) - varnames = stripe[.,2] - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // Clean label vector - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - labels_no_bl = regexm(labels_no_bl, "^L\\.") :* (regexr(labels_no_bl, "^L\\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\\.") :* labels_no_bl) - labels_no_bl = regexm(labels_no_bl, "^1L\\.") :* (regexr(labels_no_bl, "^1L\\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\\.") :* labels_no_bl) - labels_no_bl = regexr(labels_no_bl, "_Dgn_L1$", "_Dgn") - - // Save as macro for writing labels from Stata - st_local("nice_labels", invtokens(labels_no_bl')) -end - -* Save cleaned labels into your original file -capture file close labelout -file open labelout using "$dir_results/temp_labels.txt", write replace -file write labelout "v1" _n // header for import -foreach lbl in `nice_labels' { - file write labelout "`lbl'" _n -} -file close labelout - -* Import cleaned labels from your file -import delimited "$dir_results/temp_labels.txt", clear varnames(1) encoding(utf8) -gen n = _n - -* Export to Excel (vertical layout in column A) -putexcel set "$dir_results/reg_health", sheet("H2") modify -summarize n, meanonly -local N = r(max) + 1 -forvalue i = 2/`N' { - local j = `i' - 1 - putexcel A`i' = v1[`j'] -} - -* Export to Excel (horizontal layout in row 1, starting at column C) -forvalues j = 1/`N' { - local n = `j' + 2 // shift index: col C = 3 - local col "" - local nn = `n' - while `nn' > 0 { - local rem = mod(`nn' - 1, 26) - local col = char(65 + `rem') + "`col'" - local nn = floor((`nn' - 1)/26) - } - putexcel `col'1 = v1[`j'] -} - -* Clean up original file -erase "$dir_results/temp_labels.txt" - - -* Export model fit statistics -putexcel set "$dir_results/reg_health", sheet("Gof") modify - -putexcel A15 = "H2-Long-term sick/disabled or on disability benefits", bold -putexcel A17 = "Pseudo R-squared" -putexcel B17 = r2_p -putexcel A18 = "N" -putexcel B18 = N_sample -putexcel E17 = "Chi^2" -putexcel F17 = chi2 -putexcel E18 = "Log likelihood" -putexcel F18 = ll + Deh_c4_Medium Deh_c4_Low Deh_c4_Na /// + L_Ydses_c5_Q2 L_Ydses_c5_Q3 L_Ydses_c5_Q4 L_Ydses_c5_Q5 /// + L_Dhe_pcs L_Dhe_mcs /// + L_Dlltsd01 /// + L_Dhhtp_c4_CoupleChildren L_Dhhtp_c4_SingleNoChildren /// + L_Dhhtp_c4_SingleChildren $regions Year_transformed Y2020 Y2021 /// + $ethnicity /// + if ${h2_if_condition} [pw=${weight}], vce(robust) + +process_regression, domain("health") process("H2") sheet("H2") /// + title("Process H2: Prob.disabled or long term sick") /// + gofrow(7) goflabel("H2 - Disabled or long term sick") /// + ifcond("${h2_if_condition}") probit + -* Clean up -//drop in_sample p -scalar drop _all -matrix drop _all - - -capture log close +display "Self-rated health analysis complete!" + +capture log close diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_health_mental.do b/input/InitialPopulations/compile/RegressionEstimates/reg_health_mental.do index 0b3fd33aa..dd0076a2a 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_health_mental.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_health_mental.do @@ -1,9 +1,9 @@ ******************************************************************************** -* PROJECT: UC and mental health +* PROJECT: SimPaths UK * SECTION: Health and wellbeing * OBJECT: Health status and Disability -* AUTHORS: Andy Baxter -* LAST UPDATE: 17 Feb 2026 +* AUTHORS: Andy Baxter, Ashley Burdett +* LAST UPDATE: 26 Mar 2026 (AB) * COUNTRY: UK * * NOTES: @@ -15,680 +15,172 @@ set mem 200m set maxvar 30000 -******************************************************************* +********************************** SET LOG FILE ******************************** cap log close log using "${dir_log}/reg_health_mental.log", replace -******************************************************************* -/********************************* PREPARE DATA *******************************/ -use ${estimation_sample}, clear +****************************** SAMPLE GLOBALS ********************************** +* For master file -* Set data -xtset idperson swv -sort idperson swv +global HM1_L_if_condition "" +global HM2_Females_L_if_condition "dag >= 25 & dag <= 64 & dgn == 0" +global HM2_Males_L_if_condition "dag >= 25 & dag <= 64 & dgn == 1" +global HM1_C_if_condition "stm != 20 & stm != 21 & dag >= 25 & dag <= 64 & swv != 12" +global HM2_Females_C_if_condition "dag >= 25 & dag <= 64 & dgn == 0" +global HM2_Males_C_if_condition "dag >= 25 & dag <= 64 & dgn == 1" -* Adjust variables -do "${dir_do}/variable_update.do" -/* DP: Household income/poverty/employment transition variables are moved to variable_update.do */ -* Remove children -drop if dag < 16 +/******************************* SET EXCEL FILE *******************************/ -********************************************************************** -* HM1_L: GHQ12 score 0-36 of all working-age adults - baseline effects * -********************************************************************** +putexcel set "$dir_results/reg_health_mental", sheet("Info") replace +putexcel A1 = "Description:" +putexcel B1 = "Model parameters governing projection of mental health" +putexcel A2 = "Authors:" +putexcel B2 = "Andy Baxter, Ashley Burdett" +putexcel A3 = "Last edit: 17 Feb 2026" -reg dhm /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhm /// -L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// -[pweight=${weight}] /// -, vce(cluster idperson) +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold - * save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' -putexcel set "$dir_raw_results/health_mental/health_mental", sheet("HM1_L") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) -gen in_sample = e(sample) +putexcel A10 = "Notes:", bold -predict p -save "$dir_validation_data/HM1_L_sample", replace +putexcel set "$dir_results/reg_health_mental", sheet("Gof") modify +putexcel A1 = "Goodness of fit", bold -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) +/********************************* PREPARE DATA *******************************/ +* Load data +use "${estimation_sample}", clear -* Results +* Set data +xtset idperson swv +sort idperson swv -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) +* Adjust variables +do "${dir_do}/variable_update.do" +/* DP: Household income/poverty/employment transition variables are moved to variable_update.do */ +* Remove children +drop if dag < 16 -* Store variance-covariance matrix -preserve +/********************************** ESTIMATION ********************************/ -putexcel set "$dir_raw_results/health_mental/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" -import excel "$dir_raw_results/health_mental/var_cov", sheet("var_cov") clear -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_mental", sheet("HM1_L", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_mental", sheet("HM1_L") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "D_Home_owner_L1" -putexcel A3 = "Dcpst_Single_L1" -putexcel A4 = "Dnc_L1" -putexcel A5 = "Dhe_pcs_L1" -putexcel A6 = "UKC" -putexcel A7 = "UKD" -putexcel A8 = "UKE" -putexcel A9 = "UKF" -putexcel A10 = "UKG" -putexcel A11 = "UKH" -putexcel A12 = "UKJ" -putexcel A13 = "UKK" -putexcel A14 = "UKL" -putexcel A15 = "UKM" -putexcel A16 = "UKN" -putexcel A17 = "Ydses_c5_Q2_L1" -putexcel A18 = "Ydses_c5_Q3_L1" -putexcel A19 = "Ydses_c5_Q4_L1" -putexcel A20 = "Ydses_c5_Q5_L1" -putexcel A21 = "Dlltsd01_L1" -putexcel A22 = "Dhm_L1" -putexcel A23 = "Dag_L1" -putexcel A24 = "Dag_sq_L1" -putexcel A25 = "Deh_c3_Medium" -putexcel A26 = "Deh_c3_Low" -putexcel A27 = "EthnicityAsian" -putexcel A28 = "EthnicityBlack" -putexcel A29 = "EthnicityOther" -putexcel A30 = "Dgn" -putexcel A31 = "Year_transformed" -putexcel A32 = "Constant" - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "D_Home_owner_L1" -putexcel D1 = "Dcpst_Single_L1" -putexcel E1 = "Dnc_L1" -putexcel F1 = "Dhe_pcs_L1" -putexcel G1 = "UKC" -putexcel H1 = "UKD" -putexcel I1 = "UKE" -putexcel J1 = "UKF" -putexcel K1 = "UKG" -putexcel L1 = "UKH" -putexcel M1 = "UKJ" -putexcel N1 = "UKK" -putexcel O1 = "UKL" -putexcel P1 = "UKM" -putexcel Q1 = "UKN" -putexcel R1 = "Ydses_c5_Q2_L1" -putexcel S1 = "Ydses_c5_Q3_L1" -putexcel T1 = "Ydses_c5_Q4_L1" -putexcel U1 = "Ydses_c5_Q5_L1" -putexcel V1 = "Dlltsd01_L1" -putexcel W1 = "Dhm_L1" -putexcel X1 = "Dag_L1" -putexcel Y1 = "Dag_sq_L1" -putexcel Z1 = "Deh_c3_Medium" -putexcel AA1 = "Deh_c3_Low" -putexcel AB1 = "EthnicityAsian" -putexcel AC1 = "EthnicityBlack" -putexcel AD1 = "EthnicityOther" -putexcel AE1 = "Dgn" -putexcel AF1 = "Year_transformed" -putexcel AG1 = "Constant" - -* save RMSE -putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A13 = ("HM1_L") B13 = rmse +************************************************************************ +* HM1_L: GHQ12 score 0-36 of all working-age adults - baseline effects * +************************************************************************ -drop in_sample p +reg dhm /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhm /// + L.dag L.dagsq i.deh_c3 i.dot i.dgn stm [pw=${weight}], /// + vce(cluster idperson) + +process_regression, domain("health_mental") process("HM1_L") sheet("HM1_L") /// + title("Process HM1_L: Mental health score") /// + gofrow(3) goflabel("HM1_L ") /// + ifcond("${HM1_L_if_condition}") -scalar drop r2_p N chi2 ll +* Save RMSE +scalar rmse = e(rmse) +putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify +putexcel A13 = ("HM1_L") B13 = rmse +scalar drop rmse + *************************************************************** * HM2_Females_L: GHQ12 Score 0-36 - causal employment effects * *************************************************************** -*Stage 2 -*Female -reghdfe dhm /// -ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress /// -y2020 y2021 /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhm /// -L.dag L.dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==0 /// -[pweight=${weight}] /// -, absorb(idperson) vce(cluster idperson) - - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_mental/health_mental", sheet("HM2_Females_L") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -gen in_sample = e(sample) +* Stage 2 - Female -predict p - -save "$dir_validation_data/HM2_Females_L_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] - -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_mental/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_mental/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_mental", sheet("HM2_Females_L", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_mental", sheet("HM2_Females_L") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" +reghdfe dhm /// + `vars_for_excel' /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhm /// + L.dag L.dagsq i.deh_c3 stm /// + if ${HM2_Females_L_if_condition} [pw=${weight}], /// + absorb(idperson) vce(cluster idperson) -* save RMSE +process_regression, domain("health_mental") process("HM2_Females_L") /// + sheet("HM2_Females_L") /// + title("Process HM2_Females_L: Mental health score") /// + gofrow(7) goflabel("HM2_Females_L ") /// + ifcond("${HM2_Females_L_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE +scalar rmse = e(rmse) putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A14 = ("HM2_Females_L") B14 = rmse - -drop in_sample p -scalar drop r2_p N chi2 ll +scalar drop rmse + *************************************************************** * HM2_Males_L: GHQ12 Score 0-36 - causal employment effects * *************************************************************** +* Stage 2 - Male -*Stage 2 -*Male -reghdfe dhm /// -ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress /// -y2020 y2021 /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhm /// -L.dag L.dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==1 /// -[pweight=${weight}] /// -, absorb(idperson) vce(cluster idperson) - - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_mental/health_mental", sheet("HM2_Males_L") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -gen in_sample = e(sample) - -predict p - -save "$dir_validation_data/HM2_Males_L_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] - -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_mental/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" -import excel "$dir_raw_results/health_mental/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} +reghdfe dhm /// + `vars_for_excel' /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhm L.dag L.dagsq i.deh_c3 stm /// + if ${HM2_Males_L_if_condition} [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_mental", sheet("HM2_Males_L", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_mental", sheet("HM2_Males_L") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE +process_regression, domain("health_mental") process("HM2_Males_L") /// + sheet("HM2_Males_L") /// + title("Process HM2_Males_L: Mental health score") /// + gofrow(11) goflabel("HM2_Males_L ") /// + ifcond("${HM2_Males_L_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE +scalar rmse = e(rmse) putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A15 = ("HM2_Males_L") B15 = rmse - -drop in_sample p -scalar drop r2_p N chi2 ll +scalar drop rmse -********************************************************************** +************************************************************************ * HM1_C: GHQ12 score 0-12 of all working-age adults - baseline effects * -********************************************************************** +************************************************************************ * New ordered logistic regression model, reflecting observed distributions ologit scghq2_dv /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.scghq2_dv /// -L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// -if stm!=20 & stm!=21 & dag>=25 & dag<=64 & swv!=12 /// -[pweight=${weight}] /// -, vce(cluster idperson) - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' -putexcel set "$dir_raw_results/health_mental/health_mental", sheet("HM1_C") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -gen in_sample = e(sample) - -predict p - -save "$dir_validation_data/HM1_C_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.scghq2_dv /// + L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// + if ${HM1_C_if_condition} /// + [pw=${weight}] /// + , vce(cluster idperson) + +process_ologit, domain("health_mental") process("HM1_C") /// + sheet("HM1_C") /// + title("Process HM1_C: Mental health score") /// + gofrow(15) goflabel("HM1_C ") /// + ifcond("${HM1_C_if_condition}") -matrix b = e(b) -matrix V = e(V) - - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_mental/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_mental/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_mental", sheet("HM1_C", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_mental", sheet("HM1_C") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "D_Home_owner_L1" -putexcel A3 = "Dcpst_Single_L1" -putexcel A4 = "Dnc_L1" -putexcel A5 = "Dhe_pcs_L1" -putexcel A6 = "UKC" -putexcel A7 = "UKD" -putexcel A8 = "UKE" -putexcel A9 = "UKF" -putexcel A10 = "UKG" -putexcel A11 = "UKH" -putexcel A12 = "UKJ" -putexcel A13 = "UKK" -putexcel A14 = "UKL" -putexcel A15 = "UKM" -putexcel A16 = "UKN" -putexcel A17 = "Ydses_c5_Q2_L1" -putexcel A18 = "Ydses_c5_Q3_L1" -putexcel A19 = "Ydses_c5_Q4_L1" -putexcel A20 = "Ydses_c5_Q5_L1" -putexcel A21 = "Dlltsd01_L1" -putexcel A22 = "Dhm_L1" -putexcel A23 = "Dag_L1" -putexcel A24 = "Dag_sq_L1" -putexcel A25 = "Deh_c3_Medium" -putexcel A26 = "Deh_c3_Low" -putexcel A27 = "EthnicityAsian" -putexcel A28 = "EthnicityBlack" -putexcel A29 = "EthnicityOther" -putexcel A30 = "Dgn" -putexcel A31 = "Year_transformed" -putexcel A32 = "Cut1" -putexcel A33 = "Cut2" -putexcel A34 = "Cut3" -putexcel A35 = "Cut4" -putexcel A36 = "Cut5" -putexcel A37 = "Cut6" -putexcel A38 = "Cut7" -putexcel A39 = "Cut8" -putexcel A40 = "Cut9" -putexcel A41 = "Cut10" -putexcel A42 = "Cut11" -putexcel A43 = "Cut12" - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "D_Home_owner_L1" -putexcel D1 = "Dcpst_Single_L1" -putexcel E1 = "Dnc_L1" -putexcel F1 = "Dhe_pcs_L1" -putexcel G1 = "UKC" -putexcel H1 = "UKD" -putexcel I1 = "UKE" -putexcel J1 = "UKF" -putexcel K1 = "UKG" -putexcel L1 = "UKH" -putexcel M1 = "UKJ" -putexcel N1 = "UKK" -putexcel O1 = "UKL" -putexcel P1 = "UKM" -putexcel Q1 = "UKN" -putexcel R1 = "Ydses_c5_Q2_L1" -putexcel S1 = "Ydses_c5_Q3_L1" -putexcel T1 = "Ydses_c5_Q4_L1" -putexcel U1 = "Ydses_c5_Q5_L1" -putexcel V1 = "Dlltsd01_L1" -putexcel W1 = "Dhm_L1" -putexcel X1 = "Dag_L1" -putexcel Y1 = "Dag_sq_L1" -putexcel Z1 = "Deh_c3_Medium" -putexcel AA1 = "Deh_c3_Low" -putexcel AB1 = "EthnicityAsian" -putexcel AC1 = "EthnicityBlack" -putexcel AD1 = "EthnicityOther" -putexcel AE1 = "Dgn" -putexcel AF1 = "Year_transformed" -putexcel AG1 = "Cut1" -putexcel AH1 = "Cut2" -putexcel AI1 = "Cut3" -putexcel AJ1 = "Cut4" -putexcel AK1 = "Cut5" -putexcel AL1 = "Cut6" -putexcel AM1 = "Cut7" -putexcel AN1 = "Cut8" -putexcel AO1 = "Cut9" -putexcel AP1 = "Cut10" -putexcel AQ1 = "Cut11" -putexcel AR1 = "Cut12" - -/* save RMSE - not strictly needed for ologit predictions -putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A16 = ("HM1_C") B16 = rmse -*/ - -drop in_sample p -scalar drop r2_p N chi2 ll *************************************************************** * HM2_Females_C: GHQ12 Score 0-12 - causal employment effects * @@ -699,284 +191,69 @@ scalar drop r2_p N chi2 ll gen RealIncomeDecrease_D = log_income - L.log_income gen scghq2_dv_L1 = L.scghq2_dv -*Stage 2 -*Female -reghdfe scghq2_dv /// -ib11.exp_emp i.exp_poverty i.exp_incchange RealIncomeDecrease_D financial_distress /// -y2020 y2021 /// -i.dhh_owned i.dcpst dnc dhe_pcs ib8.drgn i.ydses_c5 dlltsd01 /// -dag dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==0 /// -, absorb(idperson) vce(cluster idperson) - - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_mental/health_mental", sheet("HM2_Females_C") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -gen in_sample = e(sample) +* Stage 2 - Female -predict p - -save "$dir_validation_data/HM2_Females_C_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange RealIncomeDecrease_D financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] - -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_mental/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_mental/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' +reghdfe scghq2_dv /// + `vars_for_excel' /// + i.dhh_owned i.dcpst dnc dhe_pcs ib8.drgn i.ydses_c5 dlltsd01 /// + dag dagsq i.deh_c3 stm /// + if ${HM2_Females_C_if_condition} [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} +process_regression, domain("health_mental") process("HM2_Females_C") /// + sheet("HM2_Females_C") /// + title("Process HM2_Females_C: Mental health score") /// + gofrow(19) goflabel("HM2_Females_C ") /// + ifcond("${HM2_Females_C_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_mental", sheet("HM2_Females_C", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_mental", sheet("HM2_Females_C") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE +* Save RMSE +scalar rmse = e(rmse) putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A16 = ("HM2_Females_C") B16 = rmse +putexcel A17 = ("HM2_Females_C") B17 = rmse +scalar drop rmse -drop in_sample p -scalar drop r2_p N chi2 ll -*************************************************************** +************************************************************* * HM2_Males_C: GHQ12 Score 0-12 - causal employment effects * -*************************************************************** - - -*Stage 2 -*Male -reghdfe scghq2_dv /// -ib11.exp_emp i.exp_poverty i.exp_incchange RealIncomeDecrease_D financial_distress /// -y2020 y2021 /// -i.dhh_owned i.dcpst dnc dhe_pcs ib8.drgn i.ydses_c5 dlltsd01 /// -dag dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==1 /// -, absorb(idperson) vce(cluster idperson) +************************************************************* - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_mental/health_mental", sheet("HM2_Males_C") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) +* Stage 2 - Male -gen in_sample = e(sample) +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange RealIncomeDecrease_D financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" -predict p - -save "$dir_validation_data/HM2_Males_C_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) +reghdfe scghq2_dv /// + `vars_for_excel' /// + i.dhh_owned i.dcpst dnc dhe_pcs ib8.drgn i.ydses_c5 dlltsd01 /// + dag dagsq i.deh_c3 stm /// + if ${HM2_Males_C_if_condition} [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) + +process_regression, domain("health_mental") process("HM2_Males_C") /// + sheet("HM2_Males_C") /// + title("Process HM2_Males_C: Mental health score") /// + gofrow(23) goflabel("HM2_Males_C ") /// + ifcond("${HM2_Males_C_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated +putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify +putexcel A18 = ("HM2_Males_C") B18 = rmse +scalar drop rmse -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} +display "Mental health analysis complete!" + -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_mental/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_mental/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} +capture log close -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_mental", sheet("HM2_Males_C", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_mental", sheet("HM2_Males_C") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE -putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A17 = ("HM2_Males_C") B17 = rmse - -drop in_sample p -scalar drop r2_p N chi2 ll + \ No newline at end of file diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_health_wellbeing.do b/input/InitialPopulations/compile/RegressionEstimates/reg_health_wellbeing.do index f213c975f..e8edf33cf 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_health_wellbeing.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_health_wellbeing.do @@ -1,9 +1,9 @@ ******************************************************************************** -* PROJECT: UC and mental health +* PROJECT: SimPaths UK * SECTION: Health and wellbeing * OBJECT: Health status and Disability -* AUTHORS: Andy Baxter -* LAST UPDATE: 17 Feb 2026 +* AUTHORS: Andy Baxter, Ashley Burdett +* LAST UPDATE: 26 Mar 2026 (AB) * COUNTRY: UK * * NOTES: @@ -15,1417 +15,329 @@ set mem 200m set maxvar 30000 -******************************************************************* +********************************** SET LOG FILE ******************************** cap log close log using "${dir_log}/reg_health_wellbeing.log", replace -******************************************************************* -/********************************* PREPARE DATA *******************************/ -use ${estimation_sample}, clear +****************************** SAMPLE GLOBALS ********************************** +* For master file -* Set data -xtset idperson swv -sort idperson swv +global DHE_MCS1_if_condition "" +global DHE_MCS2_Females_if_condition "dag >= 25 & dag <= 64 & dgn == 0" +global DHE_MCS2_Males_if_condition "dag >= 25 & dag <= 64 & dgn == 1" +global DHE_PCS1_if_condition "" +global DHE_PCS2_Females_if_condition "dag >= 25 & dag <= 64 & dgn == 0" +global DHE_PCS2_Males_if_condition "dag >= 25 & dag <= 64 & dgn == 1" +global DLS1_if_condition "" +global DLS2_Females_if_condition "dag >= 25 & dag <= 64 & dgn == 0" +global DLS2_Males_if_condition "dag >= 25 & dag <= 64 & dgn == 1" -* Adjust variables -do "${dir_do}/variable_update.do" -/* DP: Household income/poverty/employment transition variables are moved to variable_update.do */ -* Remove children -drop if dag < 16 +/******************************* SET EXCEL FILE *******************************/ +putexcel set "$dir_results/reg_health_wellbeing", sheet("Info") replace +putexcel A1 = "Description:", bold +putexcel B1 = "Model parameters governing projection of well being" +putexcel A2 = "Authors:" +putexcel B2 = "Andy Baxter, Ashley Burdett" +putexcel A3 = "Last edit: 26 Mar 2026 (AB)" -******************************************************************************** -* DHE_MCS1 - SF12 MCS score 0-100 of all working-age adults - baseline effects * -******************************************************************************** +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold -reg dhe_mcs /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhe_mcs /// -L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// -[pweight=${weight}] /// -, vce(cluster idperson) - * save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DHE_MCS1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) +putexcel A10 = "Notes:", bold -gen in_sample = e(sample) -predict p +putexcel set "$dir_results/reg_health_wellbeing", sheet("Gof") modify +putexcel A1 = "Goodness of fit", bold -save "$dir_validation_data/DHE_MCS1_sample", replace +/********************************* PREPARE DATA *******************************/ -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) - - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_MCS1", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_MCS1") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "D_Home_owner_L1" -putexcel A3 = "Dcpst_Single_L1" -putexcel A4 = "Dnc_L1" -putexcel A5 = "Dhe_pcs_L1" -putexcel A6 = "UKC" -putexcel A7 = "UKD" -putexcel A8 = "UKE" -putexcel A9 = "UKF" -putexcel A10 = "UKG" -putexcel A11 = "UKH" -putexcel A12 = "UKJ" -putexcel A13 = "UKK" -putexcel A14 = "UKL" -putexcel A15 = "UKM" -putexcel A16 = "UKN" -putexcel A17 = "Ydses_c5_Q2_L1" -putexcel A18 = "Ydses_c5_Q3_L1" -putexcel A19 = "Ydses_c5_Q4_L1" -putexcel A20 = "Ydses_c5_Q5_L1" -putexcel A21 = "Dlltsd01_L1" -putexcel A22 = "Dhe_mcs_L1" -putexcel A23 = "Dag_L1" -putexcel A24 = "Dag_sq_L1" -putexcel A25 = "Deh_c3_Medium" -putexcel A26 = "Deh_c3_Low" -putexcel A27 = "EthnicityAsian" -putexcel A28 = "EthnicityBlack" -putexcel A29 = "EthnicityOther" -putexcel A30 = "Dgn" -putexcel A31 = "Year_transformed" -putexcel A32 = "Constant" - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "D_Home_owner_L1" -putexcel D1 = "Dcpst_Single_L1" -putexcel E1 = "Dnc_L1" -putexcel F1 = "Dhe_pcs_L1" -putexcel G1 = "UKC" -putexcel H1 = "UKD" -putexcel I1 = "UKE" -putexcel J1 = "UKF" -putexcel K1 = "UKG" -putexcel L1 = "UKH" -putexcel M1 = "UKJ" -putexcel N1 = "UKK" -putexcel O1 = "UKL" -putexcel P1 = "UKM" -putexcel Q1 = "UKN" -putexcel R1 = "Ydses_c5_Q2_L1" -putexcel S1 = "Ydses_c5_Q3_L1" -putexcel T1 = "Ydses_c5_Q4_L1" -putexcel U1 = "Ydses_c5_Q5_L1" -putexcel V1 = "Dlltsd01_L1" -putexcel W1 = "Dhe_mcs_L1" -putexcel X1 = "Dag_L1" -putexcel Y1 = "Dag_sq_L1" -putexcel Z1 = "Deh_c3_Medium" -putexcel AA1 = "Deh_c3_Low" -putexcel AB1 = "EthnicityAsian" -putexcel AC1 = "EthnicityBlack" -putexcel AD1 = "EthnicityOther" -putexcel AE1 = "Dgn" -putexcel AF1 = "Year_transformed" -putexcel AG1 = "Constant" - -* save RMSE -putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A18 = ("DHE_MCS1") B18 = rmse - - -drop in_sample p -scalar drop r2_p N chi2 ll +* Load data +use "${estimation_sample}", clear +* Set data +xtset idperson swv +sort idperson swv -*************************************************************** -* DHE_MCS2_Females: SF12 MCS score 0-100 - causal employment effects * -*************************************************************** +* Adjust variables +do "${dir_do}/variable_update.do" +/* DP: Household income/poverty/employment transition variables are moved to variable_update.do */ +* Remove children +drop if dag < 16 -*Stage 2 -*Female -reghdfe dhe_mcs /// -ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress /// -y2020 y2021 /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhe_mcs /// -L.dag L.dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==0 /// -[pweight=${weight}] /// -, absorb(idperson) vce(cluster idperson) +/********************************** ESTIMATION ********************************/ - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DHE_MCS2_Females") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" -gen in_sample = e(sample) -predict p +******************************************************************************** +* DHE_MCS1 - SF12 MCS score 0-100 of all working-age adults - baseline effects * +******************************************************************************** -save "$dir_validation_data/DHE_MCS2_Females_sample", replace +reg dhe_mcs /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhe_mcs L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// + [pw=${weight}], vce(cluster idperson) +process_regression, domain("health_wellbeing") process("DHE_MCS1") /// + sheet("DHE_MCS1") title("Process DHE_MCS1: Well-being") /// + gofrow(3) goflabel("DHE_MCS1 ") /// + ifcond("${DHE_MCS1_if_condition}") -scalar r2_p = e(r2_p) -scalar N = e(N) +* Save RMSE scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated +putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify +putexcel A18 = ("DHE_MCS1") B18 = rmse +scalar drop rmse -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve +********************************************************************** +* DHE_MCS2_Females: SF12 MCS score 0-100 - causal employment effects * +********************************************************************** -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) +* Stage 2 - Female -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_MCS2_Females", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_MCS2_Females") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE +reghdfe dhe_mcs /// + `vars_for_excel' /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhe_mcs L.dag L.dagsq i.deh_c3 stm /// + if ${DHE_MCS2_Females_if_condition} [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) + +process_regression, domain("health_wellbeing") process("DHE_MCS2_Females") /// + sheet("DHE_MCS2_Females") /// + title("Process DHE_MCS2_Females: Well-being health score") /// + gofrow(7) goflabel("DHE_MCS2_Females ") /// + ifcond("${DHE_MCS2_Females_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE +scalar rmse = e(rmse) putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A19 = ("DHE_MCS2_Females") B19 = rmse +scalar drop rmse + - -drop in_sample p -scalar drop r2_p N chi2 ll - -*************************************************************** +******************************************************************** * DHE_MCS2_Males: SF12 MCS score 0-100 - causal employment effects * -*************************************************************** - - -*Stage 2 -*Male -reghdfe dhe_mcs /// -ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress /// -y2020 y2021 /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhe_mcs /// -L.dag L.dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==1 /// -[pweight=${weight}] /// -, absorb(idperson) vce(cluster idperson) - +******************************************************************** - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DHE_MCS2_Males") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) +* Stage 2 - Male -gen in_sample = e(sample) +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" -predict p -save "$dir_validation_data/DHE_MCS2_Males_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) +reghdfe dhe_mcs /// + `vars_for_excel' /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhe_mcs L.dag L.dagsq i.deh_c3 stm /// + if ${DHE_MCS2_Males_if_condition} [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) + +process_regression, domain("health_wellbeing") process("DHE_MCS2_Males") /// + sheet("DHE_MCS2_Males") /// + title("Process DHE_MCS2_Males: Well-being health score") /// + gofrow(11) goflabel("DHE_MCS2_Males ") /// + ifcond("${DHE_MCS2_Males_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] - -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_MCS2_Males", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_MCS2_Males") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A20 = ("DHE_MCS2_Males") B20 = rmse +scalar drop rmse + - -drop in_sample p -scalar drop r2_p N chi2 ll - - -******************************************************************************* +******************************************************************************** * DHE_PCS1 - SF12 PCS score 0-100 of all working-age adults - baseline effects * ******************************************************************************** reg dhe_pcs /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_mcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhe_pcs /// -L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// -[pweight=${weight}] /// -, vce(cluster idperson) + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_mcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhe_pcs L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// + [pw=${weight}], vce(cluster idperson) - * save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DHE_PCS1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) +process_regression, domain("health_wellbeing") process("DHE_PCS1") /// + sheet("DHE_PCS1") /// + title("Process DHE_PCS1: Well-being health score") /// + gofrow(15) goflabel("DHE_PCS1 ") /// + ifcond("${DHE_PCS1_if_condition}") -gen in_sample = e(sample) - -predict p - -save "$dir_validation_data/DHE_PCS1_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) - - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_PCS1", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_PCS1") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "D_Home_owner_L1" -putexcel A3 = "Dcpst_Single_L1" -putexcel A4 = "Dnc_L1" -putexcel A5 = "Dhe_mcs_L1" -putexcel A6 = "UKC" -putexcel A7 = "UKD" -putexcel A8 = "UKE" -putexcel A9 = "UKF" -putexcel A10 = "UKG" -putexcel A11 = "UKH" -putexcel A12 = "UKJ" -putexcel A13 = "UKK" -putexcel A14 = "UKL" -putexcel A15 = "UKM" -putexcel A16 = "UKN" -putexcel A17 = "Ydses_c5_Q2_L1" -putexcel A18 = "Ydses_c5_Q3_L1" -putexcel A19 = "Ydses_c5_Q4_L1" -putexcel A20 = "Ydses_c5_Q5_L1" -putexcel A21 = "Dlltsd01_L1" -putexcel A22 = "Dhe_pcs_L1" -putexcel A23 = "Dag_L1" -putexcel A24 = "Dag_sq_L1" -putexcel A25 = "Deh_c3_Medium" -putexcel A26 = "Deh_c3_Low" -putexcel A27 = "EthnicityAsian" -putexcel A28 = "EthnicityBlack" -putexcel A29 = "EthnicityOther" -putexcel A30 = "Dgn" -putexcel A31 = "Year_transformed" -putexcel A32 = "Constant" - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "D_Home_owner_L1" -putexcel D1 = "Dcpst_Single_L1" -putexcel E1 = "Dnc_L1" -putexcel F1 = "Dhe_mcs_L1" -putexcel G1 = "UKC" -putexcel H1 = "UKD" -putexcel I1 = "UKE" -putexcel J1 = "UKF" -putexcel K1 = "UKG" -putexcel L1 = "UKH" -putexcel M1 = "UKJ" -putexcel N1 = "UKK" -putexcel O1 = "UKL" -putexcel P1 = "UKM" -putexcel Q1 = "UKN" -putexcel R1 = "Ydses_c5_Q2_L1" -putexcel S1 = "Ydses_c5_Q3_L1" -putexcel T1 = "Ydses_c5_Q4_L1" -putexcel U1 = "Ydses_c5_Q5_L1" -putexcel V1 = "Dlltsd01_L1" -putexcel W1 = "Dhe_pcs_L1" -putexcel X1 = "Dag_L1" -putexcel Y1 = "Dag_sq_L1" -putexcel Z1 = "Deh_c3_Medium" -putexcel AA1 = "Deh_c3_Low" -putexcel AB1 = "EthnicityAsian" -putexcel AC1 = "EthnicityBlack" -putexcel AD1 = "EthnicityOther" -putexcel AE1 = "Dgn" -putexcel AF1 = "Year_transformed" -putexcel AG1 = "Constant" - -* save RMSE +* Save RMSE +scalar rmse = e(rmse) putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A21 = ("DHE_PCS1") B21 = rmse - +putexcel A21 = ("DHE_PCS1") B21 = rmse +scalar drop rmse -drop in_sample p -scalar drop r2_p N chi2 ll - -*************************************************************** +********************************************************************** * DHE_PCS2_Females: SF12 PCS score 0-100 - causal employment effects * -*************************************************************** - +********************************************************************** -*Stage 2 -*Female -reghdfe dhe_pcs /// -ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress /// -y2020 y2021 /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_mcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhe_pcs /// -L.dag L.dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==0 /// -[pweight=${weight}] /// -, absorb(idperson) vce(cluster idperson) - - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DHE_PCS2_Females") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -gen in_sample = e(sample) +* Stage 2 - Female -predict p +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" -save "$dir_validation_data/DHE_PCS2_Females_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) +reghdfe dhe_pcs /// + `vars_for_excel' /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_mcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhe_pcs L.dag L.dagsq i.deh_c3 stm /// + if ${DHE_PCS2_Females_if_condition} /// + [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) + +process_regression, domain("health_wellbeing") process("DHE_PCS2_Females") /// + sheet("DHE_PCS2_Females") /// + title("Process DHE_PCS2_Females: Well-being health score") /// + gofrow(19) goflabel("DHE_PCS2_Females ") /// + ifcond("${DHE_PCS2_Females_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] - -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_PCS2_Females", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_PCS2_Females") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A22 = ("DHE_PCS2_Females") B22 = rmse +scalar drop rmse + - - -drop in_sample p -scalar drop r2_p N chi2 ll - -*************************************************************** +******************************************************************** * DHE_PCS2_Males: SF12 PCS score 0-100 - causal employment effects * -*************************************************************** - - -*Stage 2 -*Male -reghdfe dhe_pcs /// -ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress /// -y2020 y2021 /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_mcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dhe_pcs /// -L.dag L.dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==1 /// -[pweight=${weight}] /// -, absorb(idperson) vce(cluster idperson) - - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DHE_PCS2_Males") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) +******************************************************************** -gen in_sample = e(sample) +* Stage 2 - Male -predict p +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" -save "$dir_validation_data/DHE_PCS2_Males_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) +reghdfe dhe_pcs /// + `vars_for_excel' /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_mcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dhe_pcs L.dag L.dagsq i.deh_c3 stm /// + if ${DHE_PCS2_Males_if_condition} [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) + +process_regression, domain("health_wellbeing") process("DHE_PCS2_Males") /// + sheet("DHE_PCS2_Males") /// + title("Process DHE_PCS2_Males: Well-being health score") /// + gofrow(23) goflabel("DHE_PCS2_Males ") /// + ifcond("${DHE_PCS2_Males_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] - -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_PCS2_Males", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DHE_PCS2_Males") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A23 = ("DHE_PCS2_Males") B23 = rmse +scalar drop rmse + - -drop in_sample p -scalar drop r2_p N chi2 ll - - -******************************************************************************* +***************************************************************************** * DLS1 - Life Satisfaction 1-7 of all working-age adults - baseline effects * -******************************************************************************** +***************************************************************************** reg dls /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dls /// -L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// -[pweight=${weight}] /// -, vce(cluster idperson) - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DLS1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -gen in_sample = e(sample) - -predict p - -save "$dir_validation_data/DLS1_sample", replace - - -scalar r2_p = e(r2_p) -scalar N = e(N) -scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dls L.dag L.dagsq i.deh_c3 i.dot i.dgn stm /// + [pw=${weight}], vce(cluster idperson) -matrix b = e(b) -matrix V = e(V) - - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear +process_regression, domain("health_wellbeing") process("DLS1") sheet("DLS1") /// + title("Process DLS1: Well-being health score") /// + gofrow(27) goflabel("DLS1 ") /// + ifcond("${DLS1_if_condition}") -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DLS1", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DLS1") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "D_Home_owner_L1" -putexcel A3 = "Dcpst_Single_L1" -putexcel A4 = "Dnc_L1" -putexcel A5 = "Dhe_pcs_L1" -putexcel A6 = "UKC" -putexcel A7 = "UKD" -putexcel A8 = "UKE" -putexcel A9 = "UKF" -putexcel A10 = "UKG" -putexcel A11 = "UKH" -putexcel A12 = "UKJ" -putexcel A13 = "UKK" -putexcel A14 = "UKL" -putexcel A15 = "UKM" -putexcel A16 = "UKN" -putexcel A17 = "Ydses_c5_Q2_L1" -putexcel A18 = "Ydses_c5_Q3_L1" -putexcel A19 = "Ydses_c5_Q4_L1" -putexcel A20 = "Ydses_c5_Q5_L1" -putexcel A21 = "Dlltsd01_L1" -putexcel A22 = "Dls_L1" -putexcel A23 = "Dag_L1" -putexcel A24 = "Dag_sq_L1" -putexcel A25 = "Deh_c3_Medium" -putexcel A26 = "Deh_c3_Low" -putexcel A27 = "EthnicityAsian" -putexcel A28 = "EthnicityBlack" -putexcel A29 = "EthnicityOther" -putexcel A30 = "Dgn" -putexcel A31 = "Year_transformed" -putexcel A32 = "Constant" - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "D_Home_owner_L1" -putexcel D1 = "Dcpst_Single_L1" -putexcel E1 = "Dnc_L1" -putexcel F1 = "Dhe_pcs_L1" -putexcel G1 = "UKC" -putexcel H1 = "UKD" -putexcel I1 = "UKE" -putexcel J1 = "UKF" -putexcel K1 = "UKG" -putexcel L1 = "UKH" -putexcel M1 = "UKJ" -putexcel N1 = "UKK" -putexcel O1 = "UKL" -putexcel P1 = "UKM" -putexcel Q1 = "UKN" -putexcel R1 = "Ydses_c5_Q2_L1" -putexcel S1 = "Ydses_c5_Q3_L1" -putexcel T1 = "Ydses_c5_Q4_L1" -putexcel U1 = "Ydses_c5_Q5_L1" -putexcel V1 = "Dlltsd01_L1" -putexcel W1 = "Dls_L1" -putexcel X1 = "Dag_L1" -putexcel Y1 = "Dag_sq_L1" -putexcel Z1 = "Deh_c3_Medium" -putexcel AA1 = "Deh_c3_Low" -putexcel AB1 = "EthnicityAsian" -putexcel AC1 = "EthnicityBlack" -putexcel AD1 = "EthnicityOther" -putexcel AE1 = "Dgn" -putexcel AF1 = "Year_transformed" -putexcel AG1 = "Constant" - -* save RMSE +* Save RMSE +scalar rmse = e(rmse) putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A24 = ("DLS1") B24 = rmse - +putexcel A24 = ("DLS1") B24 = rmse +scalar drop rmse -drop in_sample p -scalar drop r2_p N chi2 ll - - -*************************************************************** + +******************************************************************* * DLS2_Females: Life Satisfaction 1-7 - causal employment effects * -*************************************************************** - - -*Stage 2 -*Female -reghdfe dls /// -ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress /// -y2020 y2021 /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dls /// -L.dag L.dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==0 /// -[pweight=${weight}] /// -, absorb(idperson) vce(cluster idperson) - - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DLS2_Females") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -gen in_sample = e(sample) - -predict p +******************************************************************* -save "$dir_validation_data/DLS2_Females_sample", replace +* Stage 2 - Female +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" -scalar r2_p = e(r2_p) -scalar N = e(N) +reghdfe dls /// + `vars_for_excel' /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dls L.dag L.dagsq i.deh_c3 stm /// + if ${DLS2_Females_if_condition} [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) + +process_regression, domain("health_wellbeing") process("DLS2_Females") /// + sheet("DLS2_Females") /// + title("Process DLS2_Females: Well-being health score") /// + gofrow(31) goflabel("DLS2_Females ") /// + ifcond("${DLS2_Females_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] - -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DLS2_Females", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DLS2_Females") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A25 = ("DLS2_Females") B25 = rmse +scalar drop rmse + - -drop in_sample p -scalar drop r2_p N chi2 ll - -*************************************************************** +***************************************************************** * DLS2_Males: Life Satisfaction 1-7 - causal employment effects * -*************************************************************** - - -*Stage 2 -*Male -reghdfe dls /// -ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress /// -y2020 y2021 /// -L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 L.dlltsd01 L.dls /// -L.dag L.dagsq i.deh_c3 stm /// -if dag>=25 & dag<=64 & dgn==1 /// -[pweight=${weight}] /// -, absorb(idperson) vce(cluster idperson) - - - * save raw results -matrix results = r(table) -matrix results = results[1..6,1..10]' -putexcel set "$dir_raw_results/health_wellbeing/health_wellbeing", sheet("DLS2_Males") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -gen in_sample = e(sample) +***************************************************************** -predict p +* Stage 2 - Male -save "$dir_validation_data/DLS2_Males_sample", replace +* List of variables to be reported in excel +local vars_for_excel "ib11.exp_emp i.exp_poverty i.exp_incchange D.log_income financial_distress y2020 y2021" +* Number of estimates to be reported in excel +local n_vars_for_excel "11" - -scalar r2_p = e(r2_p) -scalar N = e(N) +reghdfe dls /// + `vars_for_excel' /// + L.i.dhh_owned L.i.dcpst L.dnc L.dhe_pcs L.ib8.drgn L.i.ydses_c5 /// + L.dlltsd01 L.dls L.dag L.dagsq i.deh_c3 stm /// + if ${DLS2_Males_if_condition} [pw=${weight}] /// + , absorb(idperson) vce(cluster idperson) + +process_regression, domain("health_wellbeing") process("DLS2_Males") /// + sheet("DLS2_Males") /// + title("Process DLS2_Males: Well-being health score") /// + gofrow(35) goflabel("DLS2_Males ") /// + ifcond("${DLS2_Males_if_condition}") /// + gformula maxestimates(`n_vars_for_excel') + +* Save RMSE scalar rmse = e(rmse) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Results - -* Note: Zeros values are eliminated - -matrix b = e(b) -matrix V = e(V) -matrix V = V[1..14,1..14] - -forvalues i = 1/14 { - forvalues j = 1/14 { - if `i' == `j' { - continue - } - matrix V[`i',`j'] = 0 - } -} - -* Store variance-covariance matrix - -preserve - -putexcel set "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") replace -putexcel A1 = matrix(V) - -import excel "$dir_raw_results/health_wellbeing/var_cov", sheet("var_cov") clear - -describe -local no_vars = `r(k)' - -forvalues i = 1/2 { - egen row_sum = rowtotal(*) - drop if row_sum == 0 - drop row_sum - xpose, clear -} - -mkmat v*, matrix(var) -putexcel set "$dir_results/reg_health_wellbeing", sheet("DLS2_Males", replace) modify -putexcel C2 = matrix(var) - -restore - - -* Store estimated coefficients - -// Initialize a counter for non-zero coefficients -local non_zero_count = 0 -//local names : colnames b - -// Loop through each element in `b` to count non-zero coefficients -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - local non_zero_count = `non_zero_count' + 1 - } -} - -// Create a new row vector to hold only non-zero coefficients -matrix nonzero_b = J(1, `non_zero_count', .) - -// Populate nonzero_b with non-zero coefficients from b -local index = 1 -forvalues i = 1/`no_vars' { - if (b[1, `i'] != 0) { - matrix nonzero_b[1, `index'] = b[1, `i'] - local index = `index' + 1 - } -} - -putexcel set "$dir_results/reg_health_wellbeing", sheet("DLS2_Males") modify -putexcel A1 = matrix(nonzero_b'), names nformat(number_d2) - -* Labelling - -putexcel A1 = "REGRESSOR" -putexcel A2 = "EmployedToUnemployed" -putexcel A3 = "UnemployedToEmployed" -putexcel A4 = "PersistentUnemployed" -putexcel A5 = "NonPovertyToPoverty" -putexcel A6 = "PovertyToNonPoverty" -putexcel A7 = "PersistentPoverty" -putexcel A8 = "RealIncomeChange" -putexcel A9 = "RealIncomeDecrease_D" -putexcel A10 = "FinancialDistress" -putexcel A11 = "Covid_2020_D" -putexcel A12 = "Covid_2021_D" - - -putexcel B1 = "COEFFICIENT" -putexcel C1 = "EmployedToUnemployed" -putexcel D1 = "UnemployedToEmployed" -putexcel E1 = "PersistentUnemployed" -putexcel F1 = "NonPovertyToPoverty" -putexcel G1 = "PovertyToNonPoverty" -putexcel H1 = "PersistentPoverty" -putexcel I1 = "RealIncomeChange" -putexcel J1 = "RealIncomeDecrease_D" -putexcel K1 = "FinancialDistress" -putexcel L1 = "Covid_2020_D" -putexcel M1 = "Covid_2021_D" - -* save RMSE putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A26 = ("DLS2_Males") B26 = rmse +scalar drop rmse - -drop in_sample p -scalar drop r2_p N chi2 ll +display "Well-being analysis complete!" + +capture log close + + \ No newline at end of file diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_home_ownership.do b/input/InitialPopulations/compile/RegressionEstimates/reg_home_ownership.do index 3a2dd6308..3462fe010 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_home_ownership.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_home_ownership.do @@ -16,25 +16,25 @@ set type double //set maxvar 120000 set maxvar 30000 -******************************************************************* + +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_home_ownership.log", replace -******************************************************************* -* Set Excel file -* Info sheet +/********************************* SET EXCEL FILE *****************************/ putexcel set "$dir_results/reg_home_ownership", sheet("Info") replace -putexcel A1 = "Description:" +putexcel A1 = "Description:", bold putexcel B1 = "Model parameters governing projection of home ownership" -putexcel A2 = "Authors: Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" +putexcel A2 = "Authors: " +putexcel B2 = "Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" putexcel A3 = "Last edit: 18 Feb 2026 AK" -putexcel A4 = "Process:", bold -putexcel B4 = "Description:", bold -putexcel A5 = "HO1" -putexcel B5 = "Probit regression estimates of the probability of being a home owner, aged 18+" +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold +putexcel A6 = "HO1" +putexcel B6 = "Prob. of being a home owner" putexcel A10 = "Notes:", bold putexcel B10 = "Estimation sample: UK_ipop.dta with grossing up weight dwt" @@ -57,9 +57,8 @@ sort idperson swv * Adjust variables do "${dir_do}/variable_update.do" -*-------------------------------------------------- -* Create sample at benefti unit head -*-------------------------------------------------- + +* Create sample at benefit unit head * Keep adults (18+) keep if dag >= 18 @@ -97,13 +96,20 @@ by idbenefitunit swv, sort: gen n=_N assert n==1 sort idperson swv + + /********************************** ESTIMATION ********************************/ +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" + + /********************** HO1: PROBABILITY OF OWNING HOME ***********************/ display "${ho1_if_condition}" probit dhh_owned i.Dgn Dag Dag_sq /// - il.Dhhtp_c8_2 il.Dhhtp_c8_3 il.Dhhtp_c8_4 il.Dhhtp_c8_5 il.Dhhtp_c8_6 il.Dhhtp_c8_7 il.Dhhtp_c8_8 /// + il.Dhhtp_c8_2 il.Dhhtp_c8_3 il.Dhhtp_c8_4 il.Dhhtp_c8_5 il.Dhhtp_c8_6 /// + il.Dhhtp_c8_7 il.Dhhtp_c8_8 /// il.Les_c4_Student il.Les_c4_NotEmployed il.Les_c4_Retired /// i.Deh_c4_Medium i.Deh_c4_Low i.Deh_c4_Na /// l.Dhe_mcs l.Dhe_pcs /// @@ -111,223 +117,17 @@ probit dhh_owned i.Dgn Dag Dag_sq /// l.Yptciihs_dv /// l.Dhh_owned /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${ho1_if_condition} [pw=dwt], vce(cluster idperson) - - * Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/home_ownership/home_ownership", /// - sheet("Process HO1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/home_ownership/HO1.doc", replace /// -title("Process H01: Probability Own Home") /// - ctitle(Own home) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${ho1_if_condition}). Only estimated on benefit unit heads."') - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -* Save sample for stimate validation -save "$dir_validation_data/HO1_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Store results in Excel - -* Store estimates -matrix b = e(b) -matrix V = e(V) + if ${ho1_if_condition} [pw=${weight}], vce(cluster idperson) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) +process_regression, domain("home_ownership") process("HO1") sheet("HO1") /// + title("Process S2b: Prob. own home") /// + gofrow(3) goflabel("HO1 - Own home") /// + ifcond("${ho1_if_condition}") probit - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { +display "Home ownership analysis complete!" - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - -* Export into Excel -putexcel set "$dir_results/reg_home_ownership", sheet("HO1") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve -putexcel set "$dir_results/reg_home_ownership", sheet("HO1") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - -* Use Mata to extract nice labels from colstripe of e(b) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_home_ownership", sheet("HO1") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_home_ownership", sheet("Gof") modify - -putexcel A3 = "HO1 - Home ownership", bold - -putexcel A5 = "Pseudo R-squared" -putexcel B5 = r2_p -putexcel A6 = "N" -putexcel B6 = N_sample -putexcel E5 = "Chi^2" -putexcel F5 = chi2 -putexcel E6 = "Log likelihood" -putexcel F6 = ll - -drop in_sample p -scalar drop r2_p N_sample chi2 ll - capture log close - + + \ No newline at end of file diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_income.do b/input/InitialPopulations/compile/RegressionEstimates/reg_income.do index 788c10b05..c0e0da4e7 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_income.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_income.do @@ -1,9 +1,10 @@ -******************************************************************************** +/******************************************************************************* * PROJECT: SimPaths UK * SECTION: Non-employment/non-benefit income * OBJECT: Final Regresion Models -* AUTHORS: Patryk Bronka, Daria Popova, Justin van de Ven -* LAST UPDATE: 21 Jan 2026 DP +* AUTHORS: Patryk Bronka, Daria Popova, Justin van de Ven, +* Ashley Burdett +* LAST UPDATE: 26 Mar 2026 (AB) * COUNTRY: UK * NOTES: Models for split income variable @@ -12,11 +13,8 @@ * * The income do file must be run after * reg_wages.do because it uses predicted wages. -/******************************************************************************* - +******************************************************************************/ -*******************************************************************************/ -******************************************************************************** clear all set more off set mem 200m @@ -24,36 +22,38 @@ set type double //set maxvar 120000 set maxvar 30000 -******************************************************************* + +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_income.log", replace -******************************************************************* -* Set Excel file -* Info sheet + +/********************************* SET EXCEL FILE *****************************/ + putexcel set "$dir_results/reg_income", sheet("Info") replace -putexcel A1 = "Description:" -putexcel B1 = "This file contains regression estiamtes used by processes I1 (capital income), I2 (private pension, retired last year), I3 (private pension income, not retired last year) " -putexcel A2 = "Authors: Patryk Bronka, Justin Van de Ven, Daria Popova, Aleksandra Kolndrekaj" +putexcel A1 = "Description:", bold +putexcel B1 = "This file contains regression estimates used by processes I1 (capital income), I2 (private pension, retired last year), I3 (private pension income, not retired last year) " +putexcel A2 = "Authors:" +putexcel B2 = "Patryk Bronka, Justin Van de Ven, Daria Popova, Aleksandra Kolndrekaj" putexcel A3 = "Last edit: 18 Feb 2026 AK" -putexcel A4 = "Process:", bold -putexcel B4 = "Description:", bold +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold -putexcel A6 = "Process I1a" -putexcel B6 = "Logit regression estimates of the probability of receiving capital income " +putexcel A7 = "Process I1a" +putexcel B7 = "Prob. receive capital income " putexcel A8 = "Process I1b" -putexcel B8 = "OLS regression estimates (ihs) capital income amount - who receive capital income" +putexcel B8 = "Capital income amount" -putexcel A10 = "Process I2b" -putexcel B10 = "OLS regression estimates (ihs) private pension income amount - aged 50+ and were retired last yeare" +putexcel A9 = "Process I2b" +putexcel B9 = "Private pension income amount" -putexcel A12 = "Process I3a" -putexcel B12 = "Logit regression estimates of the probability of receiving private pension income - aged 50+ and not a student or retired last year" +putexcel A10 = "Process I3a" +putexcel B10 = "Prob. receive private pension income" -putexcel A14 = "Process I3b" -putexcel B14 = "OLS regression estimates (ihs) private pension income - aged 50+ and not a student or retired last year" +putexcel A11 = "Process I3b" +putexcel B11 = "Private pension income amount" putexcel A17 = "Notes:", bold @@ -62,12 +62,16 @@ putexcel B18 = "Conditions for processes are defined as globals in master.do" putexcel B19 = "Combined former capital income processes I3a and I3b and renamed as I1a and I1b" putexcel B20 = "Income variables are IHS transformed." +putexcel set "$dir_results/reg_income", sheet("Gof") modify +putexcel A1 = "Goodness of fit", bold -/**************************************************************/ -* prepare data on real growth of wages -/**************************************************************/ +/********************************* PREPARE DATA *******************************/ -import excel "$dir_external_data/time_series_factor.xlsx", sheet("UK_gdp") firstrow clear // Import real growth index +* Prepare data on real growth of wages +/* +import excel "${dir_external_data}/time_series_factor.xlsx", /// + sheet("UK_gdp") firstrow clear // Import real growth index + rename Year stm rename Value growth gen base_val = growth if stm == 2015 @@ -76,16 +80,16 @@ replace base_val = r(mean) replace growth= growth/base_val drop base_val replace stm = stm - 2000 -save "$dir_external_data\growth_rates", replace - -/********************************* PREPARE DATA *******************************/ +save "$dir_external_data\growth_rates", replace +*/ * Load data use "${estimation_sample2}", clear //panel with predicted wages * Merge in growth rates -merge m:1 stm using "$dir_external_data/growth_rates", keep(3) nogen keepusing(growth) +merge m:1 stm using "$dir_external_data/growth_rates", keep(3) nogen /// + keepusing(growth) * Set data xtset idperson swv @@ -107,8 +111,16 @@ if _rc == 0 { gen Hourly_wage = pred_hourly_wage } +cap drop in_sample +cap drop p + + /********************************** ESTIMATION ********************************/ +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" + + /*************** I1a: PROBABILITY OF RECEIVEING CAPITAL INCOME ****************/ display "${i1a_if_condition}" @@ -123,243 +135,17 @@ logit receives_ypncp /// l.Ded_Ypncp l.Ded_Yplgrs_dv l2.Ded_Yplgrs_dv l2.Ded_Ypncp /// i.Deh_c4_Low i.Deh_c4_Medium i.Deh_c4_High /// li.Les_c4_Student li.Les_c4_NotEmployed li.Les_c4_Retired /// - li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren li.Dhhtp_c4_SingleChildren /// + li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren /// + li.Dhhtp_c4_SingleChildren /// $regions Year_transformed Y2020 Y2021 $ethnicity if /// - ${i1a_if_condition} [pweight = dwt], /// + ${i1a_if_condition} [pw=${weight}], /// vce(cluster idperson) base - - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/income/income", /// - sheet("Process I1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/income/Selection_I1a.doc", replace /// -title("Process I1a: Probability Receiving Capital Income") /// - ctitle(Receives capital income) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${i1a_if_condition})"') - - -* Save sample inclusion indicator and predicted probabilities -cap drop in_sample -cap drop p -gen in_sample = e(sample) -predict p - -* Save sample for estimates validation -save "$dir_validation_data/I1_selection_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - - -* Export into Excel -putexcel set "$dir_results/reg_income", sheet("I1a") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve - -putexcel set "$dir_results/reg_income", sheet("I1a") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - -* Use Mata to extract nice labels from colstripe of e(b) -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // Handle 2L.var - labels_no_bl = /// - regexm(labels_no_bl, "^L2\.") :* /// - (regexr(labels_no_bl, "^L2\.", "") :+ "_L2") :+ /// - (!regexm(labels_no_bl, "^L2\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_income", sheet("I1a") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_income", sheet("Gof") modify - -putexcel A3 = /// - "I1a - Receiving capital income ", /// - bold - -putexcel A5 = "Pseudo R-squared" -putexcel B5 = r2_p -putexcel A6 = "N" -putexcel B6 = N_sample -putexcel E5 = "Chi^2" -putexcel F5 = chi2 -putexcel E6 = "Log likelihood" -putexcel F6 = ll - - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - - + +process_regression, domain("income") process("I1a") sheet("I1a") /// + title("Process I1a: Prob. recieve capital income") /// + gofrow(3) goflabel("I1a - Receive capital income ") /// + ifcond("${i1a_if_condition}") probit + /********************** I1b: AMOUNT OF CAPITAL INCOME *************************/ @@ -369,516 +155,45 @@ display "${i1b_if_condition}" reg ypncp i.Dgn c.Dag c.Dag_sq /// i.Deh_c4_Low i.Deh_c4_Medium i.Deh_c4_High /// li.Les_c4_Student li.Les_c4_NotEmployed li.Les_c4_Retired /// - li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren li.Dhhtp_c4_SingleChildren /// - l.Dhe_pcs l.Dhe_mcs /// + li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren /// + li.Dhhtp_c4_SingleChildren l.Dhe_pcs l.Dhe_mcs /// lc.Ypncp l2c.Ypncp lc.Yplgrs_dv l2c.Yplgrs_dv /// Ded_Dgn /*Ded_Dag Ded_Dag_sq*/ /// l.Ded_Ypncp l.Ded_Yplgrs_dv l2.Ded_Yplgrs_dv l2.Ded_Ypncp /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${i1b_if_condition} [pw=dwt], vce(cluster idperson) - - - * Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/income/income", sheet("Process I1b") modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/income/Amount_I1b.doc", replace /// -title("Process I1b: Capital Income Amount") /// - ctitle(Capital amount) label side dec(2) noparen /// - addstat("R2", e(r2)) /// - addnote(`"Note: Regression if condition = (${i1b_if_condition})"') - - -* Save sample inclusion indicator and predicted probabilities -cap drop in_sample -cap drop p -gen in_sample = e(sample) -predict p -cap drop sigma -gen sigma = e(rmse) - -* Save sample for estimate validation -save "$dir_validation_data/I1_level_sample", replace - -* Store model summary statistics -scalar r2 = e(r2) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - - -* Export into Excel -putexcel set "$dir_results/reg_income", sheet("I1b") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve - -putexcel set "$dir_results/reg_income", sheet("I1b") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - -* Use Mata to extract nice labels from colstripe of e(b) -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // Handle 2L.var - labels_no_bl = /// - regexm(labels_no_bl, "^L2\.") :* /// - (regexr(labels_no_bl, "^L2\.", "") :+ "_L2") :+ /// - (!regexm(labels_no_bl, "^L2\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_income", sheet("I1b") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - - -* Calculate RMSE -cap drop residuals squared_residuals -predict residuals , residuals -gen squared_residuals = residuals^2 - -preserve -keep if receives_ypncp == 1 -sum squared_residuals [w = dwt] -di "RMSE for Amount of capital income" sqrt(r(mean)) -putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A6 = ("I1b") B6 = (sqrt(r(mean))) -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_income", sheet("Gof") modify - -putexcel A9 = "I1b - Capital income amount", /// - bold - -putexcel A11 = "R-squared" -putexcel B11 = r2 -putexcel A12 = "N" -putexcel B12 = N_sample - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - + if ${i1b_if_condition} [pw=${weight}], vce(cluster idperson) +process_regression, domain("income") process("I1b") sheet("I1b") /// + title("Process I1b: Amount of capital income") /// + gofrow(7) goflabel("I1b - Amount of capital income") /// + ifcond("${i1b_if_condition}") + -/******************************* I2b: Amount of pension income *********************************************/ +/****************** I2b: AMOUNT OF PENSION INCOME, RETIRED L1 *****************/ *Sample: Retired individuals who were retired in the previous year. -*ypnoab = Inverse hyperbolic sine transformation of Gross personal private pension income +*ypnoab = Inverse hyperbolic sine transformation of Gross personal private +* pension income display "${i2b_if_condition}" reg ypnoab i.Dgn c.Dag /// i.Deh_c4_High i.Deh_c4_Medium i.Deh_c4_Na /// - li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren li.Dhhtp_c4_SingleChildren /// + li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren /// + li.Dhhtp_c4_SingleChildren /// l.Dhe_pcs l.Dhe_mcs /// lc.Ypnoab l2c.Ypnoab /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${i2b_if_condition} [pw=dwt], vce(cluster idperson) - - * Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/income/income", sheet("Process I2b") modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/income/Amount_I2b.doc", replace /// -title("Process I2b: Capital Income Amount") /// - ctitle(Private Pension Income amount) label side dec(2) noparen /// - addstat("R2", e(r2)) /// - addnote(`"Note: Regression if condition = (${i2b_if_condition})"') - - -* Save sample inclusion indicator and predicted probabilities -cap drop in_sample -cap drop p -gen in_sample = e(sample) -predict p -cap drop sigma -gen sigma = e(rmse) - -* Save sample for estimate validation -save "$dir_validation_data/I2_level_sample", replace - -* Store model summary statistics -scalar r2 = e(r2) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - - -* Export into Excel -putexcel set "$dir_results/reg_income", sheet("I2b") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve - -putexcel set "$dir_results/reg_income", sheet("I2b") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - -* Use Mata to extract nice labels from colstripe of e(b) -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // Handle 2L.var - labels_no_bl = /// - regexm(labels_no_bl, "^L2\.") :* /// - (regexr(labels_no_bl, "^L2\.", "") :+ "_L2") :+ /// - (!regexm(labels_no_bl, "^L2\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_income", sheet("I2b") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - -* Calculate RMSE -cap drop residuals squared_residuals -predict residuals , residuals -gen squared_residuals = residuals^2 - -preserve -keep if receives_ypncp == 1 -sum squared_residuals [w = dwt] -di "RMSE for Amount of private pension income" sqrt(r(mean)) -putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A7 = ("I2b") B7 = (sqrt(r(mean))) -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_income", sheet("Gof") modify - -putexcel A15 = /// - "I2b - Private Pension income amount", /// - bold - -putexcel A17 = "R-squared" -putexcel B17 = r2 -putexcel A18 = "N" -putexcel B18 = N_sample - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all + if ${i2b_if_condition} [pw=${weight}], vce(cluster idperson) +process_regression, domain("income") process("I2b") sheet("I2b") /// + title("Process I2b: Amount of private pension income, retired L1") /// + gofrow(11) goflabel("I2b - Amount of private pension income") /// + ifcond("${i2b_if_condition}") + -/*************************** I3a: PROBABILITY OF RECEIVEING PRIVATE PENSION INCOME ***********************************/ +/**** I3a: PROBABILITY OF RECEIVING PRIVATE PENSION INCOME, NOT RETIRED L1 ****/ + *Sample: Retired individuals who were not retired in the previous year. display "${i3a_if_condition}" @@ -887,501 +202,44 @@ logit receives_ypnoab /// i.Dgn i.Reached_Retirement_Age /// i.Deh_c4_High i.Deh_c4_Medium i.Deh_c4_Na /// li.Les_c4_NotEmployed /// - li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren li.Dhhtp_c4_SingleChildren /// + li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren /// + li.Dhhtp_c4_SingleChildren /// l.Dhe_pcs l.Dhe_mcs /// l.Hourly_wage /// $regions Year_transformed Y2020 Y2021 $ethnicity if /// - ${i3a_if_condition} [pweight = dwt], vce(cluster idperson) base - - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/income/income", /// - sheet("Pension Income selection") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/income/Selection_I3a.doc", replace /// - title("Process I3a: Probability Receiving Private Pension Income") /// - ctitle(Receives private pesnion income) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${i3a_if_condition})"') - -* Save sample inclusion indicator and predicted probabilities -cap drop in_sample -cap drop p -gen in_sample = e(sample) -predict p - -* Save sample for estimates validation -save "$dir_validation_data/I3_selection_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - -* Export into Excel -putexcel set "$dir_results/reg_income", sheet("I3a") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve - -putexcel set "$dir_results/reg_income", sheet("I3a") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - -* Use Mata to extract nice labels from colstripe of e(b) -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // Handle 2L.var - labels_no_bl = /// - regexm(labels_no_bl, "^L2\.") :* /// - (regexr(labels_no_bl, "^L2\.", "") :+ "_L2") :+ /// - (!regexm(labels_no_bl, "^L2\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_income", sheet("I3a") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { + ${i3a_if_condition} [pw=${weight}], vce(cluster idperson) base - local j = `i' - 1 - putexcel A`i' = v1[`j'] +process_regression, domain("income") process("I3a") sheet("I3a") /// + title("Process I3a: Amount of private pension income, not retired L1") /// + gofrow(15) goflabel("I3a - Receive private pension income ") /// + ifcond("${i3a_if_condition}") probit - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_income", sheet("Gof") modify - -putexcel A21 = /// - "I3a - Receiving private pension income", /// - bold -putexcel A23 = "Pseudo R-squared" -putexcel B23 = r2_p -putexcel A24 = "N" -putexcel B24 = N_sample -putexcel E23 = "Chi^2" -putexcel F23 = chi2 -putexcel E24 = "Log likelihood" -putexcel F24 = ll - - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - -/***************************** I3b: Amount of pension income ********************************************/ +/******************* I3b: AMOUNT PRIVATE PENSION, NOT RETIRED L1 **************/ *Sample: Retired individuals who were not retired in the previous year. -*ypnoab = Inverse hyperbolic sine transformation of Gross personal private pension income +*ypnoab = Inverse hyperbolic sine transformation of Gross personal private +*pension income display "${i3b_if_condition}" reg ypnoab i.Dgn c.Dag /// i.Deh_c4_High i.Deh_c4_Medium i.Deh_c4_Na /// li.Les_c4_NotEmployed /// - li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren li.Dhhtp_c4_SingleChildren /// + li.Dhhtp_c4_CoupleChildren li.Dhhtp_c4_SingleNoChildren /// + li.Dhhtp_c4_SingleChildren /// l.Dhe_pcs l.Dhe_mcs /// l.Hourly_wage /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${i3b_if_condition} [pw=dwt], vce(cluster idperson) - - - * Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/income/income", sheet("Process I3b") modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/income/Amount_I3b.doc", replace /// -title("Process I3b: Private Pension Income Amount") /// - ctitle(Private Pension Income amount) label side dec(2) noparen /// - addstat("R2", e(r2)) /// - addnote(`"Note: Regression if condition = (${i3b_if_condition})"') - - -* Save sample inclusion indicator and predicted probabilities -cap drop in_sample -cap drop p -gen in_sample = e(sample) -predict p -cap drop sigma -gen sigma = e(rmse) - -* Save sample for estimate validation -save "$dir_validation_data/I3_level_sample", replace - -* Store model summary statistics -scalar r2 = e(r2) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { + if ${i3b_if_condition} [pw=${weight}], vce(cluster idperson) - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - - -* Export into Excel -putexcel set "$dir_results/reg_income", sheet("I3b") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve - -putexcel set "$dir_results/reg_income", sheet("I3b") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - -* Use Mata to extract nice labels from colstripe of e(b) -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // Handle 2L.var - labels_no_bl = /// - regexm(labels_no_bl, "^L2\.") :* /// - (regexr(labels_no_bl, "^L2\.", "") :+ "_L2") :+ /// - (!regexm(labels_no_bl, "^L2\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_income", sheet("I3b") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - -* Calculate RMSE -cap drop residuals squared_residuals -predict residuals , residuals -gen squared_residuals = residuals^2 - -preserve -keep if receives_ypncp == 1 -sum squared_residuals [w = dwt] -di "RMSE for Amount of private pension income" sqrt(r(mean)) -putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify -putexcel A8 = ("I3b") B8 = (sqrt(r(mean))) -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_income", sheet("Gof") modify - -putexcel A27 = /// - "I3b - Private Pension income amount", /// - bold +process_regression, domain("income") process("I3b") sheet("I3b") /// + title("Process I3b: Amount of private pension income, retired L1") /// + gofrow(19) goflabel("I3b - Amount of private pension income") /// + ifcond("${i3b_if_condition}") -putexcel A28 = "R-squared" -putexcel B28 = r2 -putexcel A29 = "N" -putexcel B29 = N_sample - - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all +display "Income analysis complete!" -//end capture log close diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_leave_parental_home.do b/input/InitialPopulations/compile/RegressionEstimates/reg_leave_parental_home.do index 87a28dea3..24c893d84 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_leave_parental_home.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_leave_parental_home.do @@ -17,25 +17,24 @@ set type double set maxvar 30000 -******************************************************************* +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_leave_parental_home.log", replace -******************************************************************* -* Set Excel file -* Info sheet +/********************************* SET EXCEL FILE *****************************/ putexcel set "$dir_results/reg_leave_parental_home", sheet("Info") replace -putexcel A1 = "Description:" +putexcel A1 = "Description:", bold putexcel B1 = "Model parameters governing leaving parental home" -putexcel A2 = "Authors: Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" +putexcel A2 = "Authors:" +putexcel B2 = "Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" putexcel A3 = "Last edit: 19 Jan 2026 DP" -putexcel A4 = "Process:", bold -putexcel B4 = "Description:", bold -putexcel A5 = "P1a" -putexcel B5 = "Probit regression estimates for leaving the parental home, transitioning out of adult child status" +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold +putexcel A6 = "P1a" +putexcel B6 = "Prob. leave the parental home, transitioning out of adult child status" putexcel A10 = "Notes:", bold putexcel B10 = "Estimation sample: UK_ipop.dta with grossing up weight dwt" @@ -60,6 +59,10 @@ do "${dir_do}/variable_update.do" /********************************** ESTIMATION ********************************/ +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" + + /**************** P1: PROBABILITY OF LEAVING THE PARENTAL HOME ****************/ display "${p1_if_condition}" @@ -67,229 +70,15 @@ probit dlftphm i.Dgn Dag Dag_sq li.Deh_c4_Na li.Deh_c4_Medium li.Deh_c4_Low /// li.Les_c3_Student li.Les_c3_NotEmployed /// li.Ydses_c5_Q2 li.Ydses_c5_Q3 li.Ydses_c5_Q4 li.Ydses_c5_Q5 /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${p1_if_condition} [pw=dwt], vce(robust) - - * Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/leave_parental_home/leave_parental_home", /// - sheet("Process P1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) + if ${p1_if_condition} [pw=${weight}], vce(robust) -outreg2 stats(coef se pval) using /// - "$dir_raw_results/leave_parental_home/P1.doc", replace /// -title("Process P1: Probability Leave the Parental Home") /// - ctitle(Leave home) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${p1_if_condition})"') +process_regression, domain("leave_parental_home") process("P1") sheet("P1") /// + title("Process P1: Prob. leave parental home") /// + gofrow(3) goflabel("P1 - Leave parental home") /// + ifcond("${p1_if_condition}") probit -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -* Save sample for estiamte validation -save "$dir_validation_data/P1_sample", replace -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { +display "Leaving parental home analysis complete!" - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - - -* Export into Excel -putexcel set "$dir_results/reg_leave_parental_home", sheet("P1") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve -putexcel set "$dir_results/reg_leave_parental_home", sheet("P1") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - -* Use Mata to extract nice labels from colstripe of e(b) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_leave_parental_home", sheet("P1") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - -* Export model fit statistics -putexcel set "$dir_results/reg_leave_parental_home", sheet("Gof") modify - -putexcel A3 = "P1 - Leaving the parental home ", bold - -putexcel A5 = "Pseudo R-squared" -putexcel B5 = r2_p -putexcel A6 = "N" -putexcel B6 = N_sample -putexcel E5 = "Chi^2" -putexcel F5 = chi2 -putexcel E6 = "Log likelihood" -putexcel F6 = ll - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - - -capture log close +cap log close diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_partnership.do b/input/InitialPopulations/compile/RegressionEstimates/reg_partnership.do index 1517b9ac3..5981e4a72 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_partnership.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_partnership.do @@ -17,27 +17,26 @@ set type double //set maxvar 120000 set maxvar 30000 -******************************************************************* +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_partnership.log", replace -******************************************************************* -* Set Excel file -* Info sheet +/********************************* SET EXCEL FILE *****************************/ putexcel set "$dir_results/reg_partnership", sheet("Info") replace -putexcel A1 = "Description:" +putexcel A1 = "Description:", bold putexcel B1 = "Model parameters for relationship status projection" -putexcel A2 = "Authors: Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" +putexcel A2 = "Authors:" +putexcel B2 = "Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" putexcel A3 = "Last edit: 18 Feb 2026 AK" -putexcel A4 = "Process:", bold -putexcel B4 = "Description:", bold -putexcel A5 = "U1" -putexcel B5 = "Probit regression estimates probability of entering a partnership - single respondents aged 18+" -putexcel A6 = "U2" -putexcel B6 = "Probit regression estimates of probability of exiting a partnership - cohabiting women aged 18+" +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold +putexcel A6 = "U1 " +putexcel B6 = "Prob enter partnership" +putexcel A7 = "U2" +putexcel B7 = "Prob exit partnership" putexcel A10 = "Notes:", bold putexcel B10 = "Estimation sample: UK_ipop.dta with grossing up weight dwt" @@ -48,7 +47,6 @@ putexcel set "$dir_results/reg_partnership", sheet("Gof") modify putexcel A1 = "Goodness of fit", bold - /********************************* PREPARE DATA *******************************/ * Load data @@ -61,6 +59,8 @@ sort idperson swv * Adjust variables do "${dir_do}/variable_update.do" +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" /********************************** ESTIMATION ********************************/ @@ -70,242 +70,24 @@ display "${u1_if_condition}" probit dcpen i.Ded Dgn Dag Dag_sq lc.Dnc lc.Dnc02 /// li.Ydses_c5_Q2 li.Ydses_c5_Q3 li.Ydses_c5_Q4 li.Ydses_c5_Q5 /// /*Ded_Dag Ded_Dag_sq*/ Ded_Dgn Ded_Dnc_L1 Ded_Dnc02_L1 /// - Ded_Ydses_c5_Q2_L1 Ded_Ydses_c5_Q3_L1 Ded_Ydses_c5_Q4_L1 Ded_Ydses_c5_Q5_L1 /// - i.Deh_c4_Na i.Deh_c4_High i.Deh_c4_Medium i.Deh_c4_Low /// - li.Les_c4_Student li.Les_c4_NotEmployed li.Les_c4_Retired /// + Ded_Ydses_c5_Q2_L1 Ded_Ydses_c5_Q3_L1 Ded_Ydses_c5_Q4_L1 /// + Ded_Ydses_c5_Q5_L1 i.Deh_c4_Na i.Deh_c4_High i.Deh_c4_Medium /// + i.Deh_c4_Low li.Les_c4_Student li.Les_c4_NotEmployed li.Les_c4_Retired /// li.Les_c4_Student_Dgn li.Les_c4_NotEmployed_Dgn /// li.Les_c4_Retired_Dgn /// l.Dhe_pcs l.Dhe_mcs /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${u1_if_condition} [pw=dwt], vce(robust) - - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/partnership/partnership", /// - sheet("Process U1") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/partnership/U1.doc", replace /// -title("Process U1: Probability Form partnership") /// - ctitle(Form partnership) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${u1_if_condition})"') - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -* Save sample for later use (internal validation) -save "$dir_validation_data/U1_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - - -* Export into Excel -putexcel set "$dir_results/reg_partnership", sheet("U1") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - + if ${u1_if_condition} [pw=${weight}], vce(robust) -* Labels -preserve -putexcel set "$dir_results/reg_partnership", sheet("U1") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - -* Use Mata to extract nice labels from colstripe of e(b) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_partnership", sheet("U1") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] +process_regression, domain("partnership") process("U1") sheet("U1") /// + title("Process U1: Prob. form partnership") /// + gofrow(3) goflabel("U1 - Form partnership") /// + ifcond("${u1_if_condition}") probit - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - -* Export model fit statistics -putexcel set "$dir_results/reg_partnership", sheet("Gof") modify - -putexcel A3 = "U1- Partnership formation", bold - -putexcel A5 = "Pseudo R-squared" -putexcel B5 = r2_p -putexcel A6 = "N" -putexcel B6 = N_sample -putexcel E5 = "Chi^2" -putexcel F5 = chi2 -putexcel E6 = "Log likelihood" -putexcel F6 = ll - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - /******************* U2: PROBABILITY TERMINATE PARTNERSHIP ********************/ display "${u2_if_condition}" - -* Estimation probit dcpex i.Ded Dag Dag_sq /*Ded_Dag Ded_Dag_sq*/ /// li.Deh_c4_Na li.Deh_c4_Low li.Deh_c4_Medium li.Deh_c4_High /// li.Dehsp_c3_Medium li.Dehsp_c3_Low /// @@ -313,232 +95,20 @@ probit dcpex i.Ded Dag Dag_sq /*Ded_Dag Ded_Dag_sq*/ /// l.Dhe_pcs l.Dhe_mcs /// l.Dhe_pcssp l.Dhe_mcssp /// l.Dcpyy l.New_rel l.Dcpagdf l.Dnc l.Dnc02 /// - li.Lesdf_c4_EmpSpouseNotEmp li.Lesdf_c4_NotEmpSpouseEmp li.Lesdf_c4_BothNotEmployed /// + li.Lesdf_c4_EmpSpouseNotEmp li.Lesdf_c4_NotEmpSpouseEmp /// + li.Lesdf_c4_BothNotEmployed /// l.Ypnbihs_dv l.Ynbcpdf_dv /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${u2_if_condition} [pw=dwt], vce(robust) - - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/partnership/partnership", sheet("Process U2") /// - modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/partnership/U2.doc", replace /// -title("Process U2: Probability Terminating Partnership") /// - ctitle(End partnership) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${u2_if_condition})"') - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -* Save sample for later use (internal validation) -save "$dir_validation_data/U2_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - - -* Export into Excel -putexcel set "$dir_results/reg_partnership", sheet("U2") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - - -* Labels -preserve -putexcel set "$dir_results/reg_partnership", sheet("U2") modify + if ${u2_if_condition} [pw=${weight}], vce(robust) -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - -* Use Mata to extract nice labels from colstripe of e(b) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_partnership", sheet("U2") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" +process_regression, domain("partnership") process("U2") sheet("U2") /// + title("Process U2: Prob. end partnership") /// + gofrow(7) goflabel("U2 - End partnership") /// + ifcond("${u2_if_condition}") probit - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - -* Export model fit statistics -putexcel set "$dir_results/reg_partnership", sheet("Gof") modify - -putexcel A8 = "U2 - Partnership termination", bold + +display "Partnership analysis complete!" -putexcel A10 = "Pseudo R-squared" -putexcel B10 = r2_p -putexcel A11 = "N" -putexcel B11 = N_sample -putexcel E10 = "Chi^2" -putexcel F10 = chi2 -putexcel E11 = "Log likelihood" -putexcel F11 = ll - - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - capture log close diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_retirement.do b/input/InitialPopulations/compile/RegressionEstimates/reg_retirement.do index 805836ffa..a6bab84a8 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_retirement.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_retirement.do @@ -16,29 +16,30 @@ set type double //set maxvar 120000 set maxvar 30000 -******************************************************************* + +/********************************* SET LOG FILE *******************************/ + cap log close log using "${dir_log}/reg_retirement.log", replace -******************************************************************* -* Set Excel file -* Info sheet +/********************************* SET EXCEL FILE *****************************/ putexcel set "$dir_results/reg_retirement", sheet("Info") replace -putexcel A1 = "Description:" +putexcel A1 = "Description:", bold putexcel B1 = "Model parameters governing projection of retirement" -putexcel A2 = "Authors: Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" +putexcel A2 = "Authors: " +putexcel B2 = "Patryk Bronka, Justin van de Ven, Daria Popova, Aleksandra Kolndrekaj" putexcel A3 = "Last edit: 26 jan 2026 DP" -putexcel A4 = "Process:", bold -putexcel B4 = "Description:", bold +putexcel A5 = "Process:", bold +putexcel B5 = "Description:", bold -putexcel A5 = "R1a" -putexcel B5 = "Probit regression estimates of the probability of retiring, single individuals aged 50+ not yet retired" +putexcel A6 = "R1a" +putexcel B6 = "Prob of retiring, singles" -putexcel A6 = "R1b" -putexcel B6 = "Probit regression estimates of the probability of retiring, cohabiting individuals aged 50+ not yet retired" +putexcel A7 = "R1b" +putexcel B7 = "Prob of retiring, partnered" putexcel A10 = "Notes:", bold //putexcel B10 = "" @@ -58,10 +59,14 @@ sort idperson swv * Adjust variables do "${dir_do}/variable_update.do" - + /********************************** ESTIMATION ********************************/ +* Run Stata programs to produce Excel file +do "${dir_do}/programs.do" + + /****************** R1a: PROBABILITY OF RETIREMENT, SINLGE ********************/ display "${r1a_if_condition}" @@ -72,227 +77,13 @@ probit drtren i.Dgn Dag Dag_sq /// li.Les_c3_NotEmployed /// li.Ydses_c5_Q2 li.Ydses_c5_Q3 li.Ydses_c5_Q4 li.Ydses_c5_Q5 li.Dlltsd01 /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${r1a_if_condition} [pw=dwt], vce(robust) - - -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/retirement/retirement", /// - sheet("Process R1a") replace -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/retirement/R1a.doc", replace /// -title("Process R1a: Probability of Retirement, Single") /// - ctitle(Retire) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${r1a_if_condition})"') - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -* Save sample for estimte validation -save "$dir_validation_data/R1a_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - -* Store results in Excel - -* Store estimates -matrix b = e(b) -matrix V = e(V) - -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - -* Export into Excel -putexcel set "$dir_results/reg_retirement", sheet("R1a") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - -* Labels -preserve -putexcel set "$dir_results/reg_retirement", sheet("R1a") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - -* Use Mata to extract nice labels from colstripe of e(b) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_retirement", sheet("R1a") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { + if ${r1a_if_condition} [pw=${weight}], vce(robust) - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } +process_regression, domain("retirement") process("R1a") sheet("R1a") /// + title("Process R1a: Prob. retire, singles") /// + gofrow(3) goflabel("R1a - Retire, singles") /// + ifcond("${r1a_if_condition}") probit - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_retirement", sheet("Gof") modify - -putexcel A3 = "R1a - Retirement single", bold - -putexcel A5 = "Pseudo R-squared" -putexcel B5 = r2_p -putexcel A6 = "N" -putexcel B6 = N_sample -putexcel E5 = "Chi^2" -putexcel F5 = chi2 -putexcel E6 = "Log likelihood" -putexcel F6 = ll - - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all - - /***************** R1b: PROBABILITY OF RETIREMENT, PARTNERED ******************/ display "${r1b_if_condition}" @@ -305,230 +96,16 @@ probit drtren i.Dgn Dag Dag_sq /// i.Reached_Retirement_Age_Sp /// li.Ydses_c5_Q2 li.Ydses_c5_Q3 li.Ydses_c5_Q4 li.Ydses_c5_Q5 li.Dlltsd01 /// $regions Year_transformed Y2020 Y2021 $ethnicity /// - if ${r1b_if_condition} [pweight = dwt], vce(robust) + if ${r1b_if_condition} [pw=${weight}], vce(robust) +process_regression, domain("retirement") process("R1b") sheet("R1b") /// + title("Process R1b: Prob. retire, partnered") /// + gofrow(7) goflabel("R1a - Retire, partnered") /// + ifcond("${r1b_if_condition}") probit -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/retirement/retirement", /// - sheet("Process R1b") modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - -outreg2 stats(coef se pval) using /// - "$dir_raw_results/retirement/R1b.doc", replace /// -title("Process R1b: Probability of Retirement, Partnered") /// - ctitle(Retire) label side dec(2) noparen /// - addstat(R2, e(r2_p), Chi2, e(chi2), Log-likelihood, e(ll)) /// - addnote(`"Note: Regression if condition = (${r1b_if_condition})"') -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p - -graph bar (mean) drtren p if in_sample, over(dag, label(labsize(vsmall))) /// - legend(label(1 "observed") label(2 "predicted")) - -graph drop _all - -* Save sample for estiamte validation -save "$dir_validation_data/R1b_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Store results in Excel - -* Store estimates -matrix b = e(b) -matrix V = e(V) - -mata: - // Call matrices into mata - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) +display "Retirement analysis complete!" - // Eliminate zeros - b_trimmed = select(b, keep) - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - // Inspection - b_trimmed - V_trimmed - - // Return to Stata - st_matrix("b_trimmed", b_trimmed') - st_matrix("V_trimmed", V_trimmed) - st_matrix("nonzero_b_flag", keep) -end - -* Eigenvalue tests for var-cov invertablility in SimPaths -matrix symeigen X lambda = V_trimmed - -scalar max_eig = lambda[1,1] - -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Outcome of max eigenvalue test -if max_eig < 1.0e-12 { - - display as error "CRITICAL ERROR: Maximum eigenvalue is too small (`max_eig')." - display as error "The Variance-Covariance matrix is likely singular." - exit 999 - -} - -display "Stability Check Passed: Max Eigenvalue is " max_eig - -* Outcome of eigenvalue ratio test -if min_ratio < 1.0e-12 { - - display as error "Matrix is ill-conditioned. Min/Max ratio: " min_ratio - exit 506 - -} - -display "Stability Check Passed. Min/Max ratio: " min_ratio - -* Export into Excel -putexcel set "$dir_results/reg_retirement", sheet("R1b") modify -putexcel B2 = matrix(b_trimmed) -putexcel C2 = matrix(V_trimmed) - -* Labels -preserve -putexcel set "$dir_results/reg_retirement", sheet("R1b") modify -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - -* Use Mata to extract nice labels from colstripe of e(b) - -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -mata: - // -------------------------------------------------- - // Import objects from Stata - // -------------------------------------------------- - nonzero_b_flag = st_matrix("nonzero_b_flag") - stripe = st_matrixcolstripe("e(b)") - - // Ensure column vector - nonzero_b_flag = nonzero_b_flag' - - // -------------------------------------------------- - // Extract variable names - // -------------------------------------------------- - varnames = stripe[.,2] - - // Keep non-baseline coefficients - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - - // -------------------------------------------------- - // Clean labels - // -------------------------------------------------- - labels_no_bl = usubinstr(varnames_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Handle lags: L.var -> var_L1 - labels_no_bl = /// - regexm(labels_no_bl, "^L\.") :* /// - (regexr(labels_no_bl, "^L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^L\.") :* labels_no_bl) - - // Handle 1L.var - labels_no_bl = /// - regexm(labels_no_bl, "^1L\.") :* /// - (regexr(labels_no_bl, "^1L\.", "") :+ "_L1") :+ /// - (!regexm(labels_no_bl, "^1L\.") :* labels_no_bl) - - // -------------------------------------------------- - // Add header - // -------------------------------------------------- - labels_out = "v1" \ labels_no_bl - - // -------------------------------------------------- - // Write to temp file - // -------------------------------------------------- - outfile = st_local("dir_results") + "/temp_labels.txt" - fh = fopen(outfile, "w") - for (i=1; i<=rows(labels_out); i++) { - fput(fh, labels_out[i]) - } - fclose(fh) -end - - - * Import cleaned labels into Stata - import delimited "$dir_results/temp_labels.txt", clear varnames(1) /// - encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_retirement", sheet("R1b") modify - - * Vertical labels - summarize n, meanonly - local N = r(max)+1 - forvalue i = 2/`N' { - - local j = `i' - 1 - putexcel A`i' = v1[`j'] - - } - - * Horizontal labels - summarize n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - - local n = `j'+2 // Shift by 2 to start from column C - local col "" - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - * Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - -* Export model fit statistics -putexcel set "$dir_results/reg_retirement", sheet("Gof") modify - -putexcel A9 = "R1b - Retirement partnered", bold - -putexcel A11 = "Pseudo R-squared" -putexcel B11 = r2_p -putexcel A12 = "N" -putexcel B12 = N_sample -putexcel E11 = "Chi^2" -putexcel F11 = chi2 -putexcel E12 = "Log likelihood" -putexcel F12 = ll - -* Clean up -drop in_sample p -scalar drop _all -matrix drop _all -graph drop _all - capture log close diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_socialcare.do b/input/InitialPopulations/compile/RegressionEstimates/reg_socialcare.do index f58734e0a..9ea9cd764 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_socialcare.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_socialcare.do @@ -55,10 +55,10 @@ set type double //set maxvar 120000 set maxvar 30000 -******************************************************************* + +/********************************* SET LOG FILE *******************************/ cap log close log using "${dir_log}/reg_socialcare.log", replace -******************************************************************* /********************************* SET EXCEL FILE *****************************/ @@ -66,9 +66,9 @@ log using "${dir_log}/reg_socialcare.log", replace putexcel set "$dir_results/reg_socialcare", sheet("Info") replace putexcel A1 = "Description:", bold putexcel B1 = "Model parameters for social care module" -putexcel A2 = "Authors:", bold +putexcel A2 = "Authors:" putexcel B2 = "Justin van de Ven, Ashley Burdett, Matteo Richiardi, Daria Popova" -putexcel A3 = "Last edit:", bold +putexcel A3 = "Last edit:" putexcel B3 = "16 Feb 2026 MR (Refactored)" putexcel B3 = "18 Feb 2026 DP (Integrated into the pipeline)" @@ -94,12 +94,9 @@ putexcel set "$dir_results/reg_socialcare", sheet("Gof") modify putexcel A1 = "Goodness of fit", bold +/********************************* PREPARE DATA *******************************/ -/*============================================================================== - MAIN ANALYSIS -==============================================================================*/ - -use ${estimation_sample}, clear +use "${estimation_sample}", clear * Time series structure gsort idperson stm @@ -108,27 +105,27 @@ xtset idperson stm * Adjust variables do "${dir_do}/variable_update.do" + +/********************************** ESTIMATION ********************************/ + * Run Stata programs to produce Excel file do "${dir_do}/programs.do" -/*============================================================================== - REGRESSIONS -==============================================================================*/ - * Stats for if conditions -/* + table stm, stat (count NeedCare) stat (mean NeedCare) // [2015, 2022] table stm, stat (count ReceiveCare) stat (mean ReceiveCare) // [2016, 2021] but with significant decrease in 2020 and 2021 table stm, stat (count receive_formal_care) stat (mean receive_formal_care) // [2016, 2021] but with significant decrease in 2020 and 2021 table stm, stat (count receive_informal_care) stat (mean receive_informal_care) // [2016, 2021] but with significant decrease in 2020 and 2021 table stm, stat (count provide_informal_care) stat (mean provide_informal_care) // [2015, 2024] also 2014, but fewer hours */ +/* table stm, c(count NeedCare mean NeedCare) table stm, c(count ReceiveCare mean ReceiveCare) table stm, c(count receive_formal_care mean receive_formal_care) table stm, c(count receive_informal_care mean receive_informal_care) table stm, c(count provide_informal_care mean provide_informal_care) - +*/ /* Age variables (for experimenting -> copy and paste in the specification) Dag Dagsq /// @@ -136,6 +133,7 @@ table stm, c(count provide_informal_care mean provide_informal_care) Age77to78 Age79to80 Age81to82 Age83to84 Age85plus /// */ + /************************ Probit need care (S2a) ******************************/ probit NeedCare NeedCare_L1 Dgn /// @@ -145,13 +143,14 @@ probit NeedCare NeedCare_L1 Dgn /// Partnered /// Deh_c4_Medium Deh_c4_Low /// Y2020 Y2021 ${regions} ${ethnicity} /// - if ${s2a_if_condition} [pweight=${weight}], vce(r) + if ${s2a_if_condition} [pw=${weight}], vce(r) -process_regression, process("S2a") sheet("S2a") /// +process_regression, domain("socialcare") process("S2a") sheet("S2a") /// title("Process S2a: Prob. need care") /// gofrow(3) goflabel("S2a - Need care") /// ifcond("${s2a_if_condition}") probit + /************************ Probit receive care (S2b) ***************************/ probit ReceiveCare ReceiveCare_L1 Dgn /// @@ -162,16 +161,15 @@ probit ReceiveCare ReceiveCare_L1 Dgn /// Deh_c4_Medium Deh_c4_Low /// HHincomeQ2 HHincomeQ3 HHincomeQ4 HHincomeQ5 /// Y2020 Y2021 ${regions} ${ethnicity} /// - if ${s2b_if_condition} [pweight=${weight}], vce(r) + if ${s2b_if_condition} [pw=${weight}], vce(r) -process_regression, process("S2b") sheet("S2b") /// +process_regression, domain("socialcare") process("S2b") sheet("S2b") /// title("Process S2b: Prob. receive care") /// gofrow(7) goflabel("S2b - Receive care") /// ifcond("${s2b_if_condition}") probit /************************ Mlogit formal/informal (S2c) ************************/ - /* Informal is base outcome Mixed is 1st outcome @@ -186,342 +184,14 @@ mlogit CareMarket CareMarketFormal_L1 CareMarketInformal_L1 CareMarketMixed_L1 D Deh_c4_Medium Deh_c4_Low /// HHincomeQ2 HHincomeQ3 HHincomeQ4 HHincomeQ5 /// Y2020 Y2021 ${regions} ${ethnicity} /// - if ${s2c_if_condition} [pweight=${weight}], vce(r) base(2) -/* -process_mlogit, process("S2c") sheet("S2c") /// - title("Process S2c: Formal/informal care") /// - gofrow(11) goflabel("S2c - Formal/informal") /// - outcomes(3) ifcond("${s2c_if_condition}") -*/ - -/* DP: Use this routine as program for MLogit does not display labels corectly in Excel ==> to replace by program later on ? */ -* Save raw results -matrix results = r(table) -matrix results = results[1..6,1...]' - -putexcel set "$dir_raw_results/social_care/socialcare", sheet("Process S2c") /// - modify -putexcel A3 = matrix(results), names nformat(number_d2) -putexcel J4 = matrix(e(V)) - - -* Save sample inclusion indicator and predicted probabilities -gen in_sample = e(sample) -predict p1 p2 p3 - -* Save sample for estimates validation -save "$dir_validation_data/S2c_sample", replace - -* Store model summary statistics -scalar r2_p = e(r2_p) -scalar N_sample = e(N) -scalar chi2 = e(chi2) -scalar ll = e(ll) - - -* Store results in Excel - -* Store estimates in matrices -matrix b = e(b) -matrix V = e(V) - -* Raw output -putexcel set "$dir_results/reg_socialcare", sheet("S2c_raw") modify -putexcel A1 = matrix(b'), names nformat(number_d2) -putexcel A1 = "CATEGORY" -putexcel B1 = "REGRESSOR" -putexcel C1 = "COEFFICIENT" - -* Estimated coefficients -scalar no_coefs_all = colsof(b) - -* Eliminate rows and columns containing zeros (baseline cats) -mata: - // Call matrices into mata - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) - - // Eliminate zeros - nonzero_b = select(b, keep) - - nonzero_b - - // Return to Stata - st_matrix("nonzero_b", nonzero_b) - st_matrix("nonzero_b_flag", keep) -end - -* Inspect -matrix list b -matrix list nonzero_b -matrix list nonzero_b_flag - -* Save dimensions -scalar no_nonzero_b = colsof(nonzero_b) -scalar no_nonzero_b_per = no_nonzero_b / 2 - -* Address repetition of proportional odds covariates - -* Generate repetition/unique observation flag -mata: - // Import matrices into mata - nonzero_b_mata = st_matrix("nonzero_b") - - // Generate binary vector =1 if coefficient repeated - n = cols(nonzero_b_mata) - repetition_flag = J(n, 1, 0) - - // use tolerance based comparison to avoid precision errors - tol = 1e-8 - - for (i = 1; i <= n; i++) { - for (j = 1; j <= n; j++) { - if (i != j && abs(nonzero_b_mata[i] - nonzero_b_mata[j]) < tol) { - repetition_flag[i] = 1 - break - } - } - } - repetition_flag - - // Generate binary vector =1 if coefficient not repeated - unique_flag = 1 :- repetition_flag - - // Return to Stata - st_matrix("repetition_flag", repetition_flag') - st_matrix("unique_flag", unique_flag') - -end - -* Generate vector to multiply the coef vector with to eliminate the repetitions -* of coefficients for vars that satify the proportional odds assumptions -matrix structure_a = J(1,no_nonzero_b_per,1) -matrix structure_b = unique_flag[1,no_nonzero_b_per+1..no_nonzero_b] -matrix structure = structure_a, structure_b - -* Inspect -matrix list structure_a -matrix list structure_b -matrix list structure -matrix list nonzero_b - -* Eliminate repetitions -mata: - // Call matrices into mata - var = st_matrix("var") - structure = st_matrix("structure") - nonzero_b = st_matrix("nonzero_b") - - // Convert reptitions into zeros - b_structure = structure :* nonzero_b - - b_structure - - // Eliminate zeros - keep = (b_structure :!= 0) - - nonzero_b_structure = select(b_structure, keep) - - // Export to Stata - st_matrix("b_structure", b_structure) - st_matrix("nonzero_b_structure", nonzero_b_structure) - -end - -matrix list nonzero_b_structure - -* Export into Excel -putexcel set "$dir_results/reg_socialcare", sheet("S2c") modify -putexcel A1 = matrix(nonzero_b_structure'), names //nformat(number_d2) - - -* Variance-covariance matrix -* Eliminate zeros (baseline categories) -mata: - V = st_matrix("V") - b = st_matrix("b") - - // Find which coefficients are nonzero - keep = (b :!= 0) + if ${s2c_if_condition} [pw=${weight}], vce(r) base(2) - // Eliminate zeros - V_trimmed = select(V, keep) - V_trimmed = select(V_trimmed', keep)' - - V_trimmed +process_gologit, domain("socialcare") process("S2c") sheet("S2c") /// + title("Process S2c: Formal vs Informal") /// + gofrow(11) goflabel("S2c - Formal vs Informal") /// + outcomes(3) /// + ifcond("${s2c_if_condition}") - // Return to Stata - st_matrix("var", V_trimmed) -end - -matrix list var - - -* Address repetition due to proportional odds being satisfied for some covars -matrix square_structure_a = J(no_nonzero_b,1,1) * structure -matrix square_structure_b = square_structure_a' - -matrix list square_structure_a -matrix list square_structure_b -mata: - // Call matrices into mata - var = st_matrix("var") - - // Create structure matrix (0 = eliminate) - square_structure_a = st_matrix("square_structure_a") - square_structure_b = st_matrix("square_structure_b") - - // Element-by-element multiplication - square_structure = square_structure_a :* square_structure_b - var_structure = square_structure :* var - - // Eliminate zeros - row_keep = rowsum(abs(var_structure)) :!= 0 - col_keep = colsum(abs(var_structure)) :!= 0 - - nonzero_var_structure = select(select(var_structure, row_keep), col_keep) - - // Return to Stata - st_matrix("nonzero_var_structure", nonzero_var_structure) -end - -matrix list nonzero_var_structure - -* Export to Excel -putexcel set "$dir_results/reg_socialcare", sheet("S2c") modify -putexcel C2 = matrix(nonzero_var_structure) - -*======================================================================= -* Eigenvalue stability check for trimmed variance-covariance matrix - -matrix symeigen X lambda = nonzero_var_structure - -* Largest eigenvalue -scalar max_eig = lambda[1,1] - -* Ratio of smallest to largest eigenvalue -scalar min_ratio = lambda[1, colsof(lambda)] / max_eig - -* Check 1: near-singularity -if max_eig < 1.0e-12 { - display as error "CRITICAL ERROR: Variance-covariance matrix is near singular." - display as error "Max eigenvalue = " max_eig - exit 999 -} - -* Check 2: ill-conditioning -if min_ratio < 1.0e-12 { - display as error "Matrix is ill-conditioned." - display as error "Min/Max eigenvalue ratio = " min_ratio - exit 506 -} - -display "VCV stability check passed." -display "Max eigenvalue: " max_eig -display "Min/Max ratio: " min_ratio -*======================================================================= - -* Labels -preserve - -putexcel set "$dir_results/reg_socialcare", sheet("S2c") modify - -putexcel A1 = "REGRESSOR" -putexcel B1 = "COEFFICIENT" - - - * Use Mata to extract nice labels from colstripe of e(b) (replacement for Stata 14) -local dir_results "$dir_results" -cap erase "$dir_results/temp_labels.txt" - -* Run Mata block -mata: - // Import matrices from Stata - nonzero_b_flag = st_matrix("nonzero_b_flag")' - unique_flag = st_matrix("unique_flag")' - structure = st_matrix("structure")' - stripe = st_matrixcolstripe("e(b)") - - // Extract variable and category names - catnames = stripe[.,1] - varnames = stripe[.,2] - varnames_no_bl = select(varnames, nonzero_b_flag :== 1) - catnames_no_bl = select(catnames, nonzero_b_flag :== 1) - - // Handle lags - labels_no_bl = regexm(varnames_no_bl, "^L_") :* (regexr(varnames_no_bl, "^L_", "") :+ "_L1") :+ (!regexm(varnames_no_bl, "^L_") :* varnames_no_bl) - - // Add category name when flag is not unique - labels_no_bl = labels_no_bl :+ "_" :+ (catnames_no_bl :* (unique_flag[1::rows(labels_no_bl)] :!= 0)) - - // Clean labels - labels_no_bl = usubinstr(labels_no_bl, "1.", "", 1) - labels_no_bl = regexr(labels_no_bl, "^_cons", "Constant") - - // Filter for structure == 1 - nonzero_labels_structure = select(labels_no_bl, structure[1::rows(labels_no_bl)] :== 1) - - // Add header row - nonzero_labels_structure = "v1"\nonzero_labels_structure - - // Write to temporary file - fh = fopen(st_local("dir_results") + "/temp_labels.txt", "w") - for (i=1; i<=rows(nonzero_labels_structure); i++) { - fput(fh, nonzero_labels_structure[i]) - } - fclose(fh) -end - - * Import cleaned labels into Stata as new dataset - import delimited "$dir_results/temp_labels.txt", clear varnames(1) encoding(utf8) - gen n = _n - - * Export labels to Excel - putexcel set "$dir_results/reg_socialcare", sheet("S2c") modify - - * Vertical labels - sum n, meanonly - local N = r(max)+1 - - forvalue i = 2/`N' { - local j = `i' - 1 - putexcel A`i' = v1[`j'] - } - - * Horizontal labels - sum n, meanonly - local N = r(max) + 1 // Adjusted since we're working across columns - - forvalues j = 1/`N' { - local n = `j'+2 // Shift by 2 to start from column C - local col "" - - while `n' > 0 { - local rem = mod(`n' - 1, 26) - local col = char(65 + `rem') + "`col'" - local n = floor((`n' - 1)/26) - } - - putexcel `col'1 = v1[`j'] - } - - *Clean up - cap erase "$dir_results/temp_labels.txt" - -restore - - -* Goodness of fit - -export_gof_probit, row(11) label("Process S2c: Formal/informal care") - -* Clean up -drop in_sample p1 p2 p3 -scalar drop _all -matrix drop _all - /******************** OLS informal care hours received (S2d) ******************/ @@ -534,7 +204,7 @@ reg HrsReceivedInformalIHS HrsReceivedInformalIHS_L1 CareMarketMixed Dgn /// Y2020 Y2021 ${regions} /*${ethnicity} Ethn_White*/ /// if ${s2d_if_condition} [pweight=${weight}], vce(r) -process_regression, process("S2d") sheet("S2d") /// +process_regression, domain("socialcare") process("S2d") sheet("S2d") /// title("Process S2d: Informal care hours received") /// gofrow(15) goflabel("S2d - Hours of informal care received") /// ifcond("${s2d_if_condition}") @@ -567,7 +237,7 @@ reg HrsReceivedFormalIHS HrsReceivedFormalIHS_L1 CareMarketMixed Dgn /// Y2020 Y2021 ${regions} ${ethnicity} /// if ${s2e_if_condition} [pweight=${weight}], vce(r) -process_regression, process("S2e") sheet("S2e") /// +process_regression, domain("socialcare") process("S2e") sheet("S2e") /// title("Process S2e: Formal care hours received") /// gofrow(19) goflabel("S2e - Hours of formal care received") /// ifcond("${s2e_if_condition}") @@ -601,7 +271,7 @@ probit ProvideCare ProvideCare_L1 NeedCare ReceiveCare Dgn /// Y2020 Y2021 ${regions} ${ethnicity} /// if ${s3a_if_condition} [pweight=${weight}], vce(r) -process_regression, process("S3a") sheet("S3a") /// +process_regression, domain("socialcare") process("S3a") sheet("S3a") /// title("Process S3a: Prob. provide care, Singles") /// gofrow(23) goflabel("S3a - Provide care, Singles") /// ifcond("${s3a_if_condition}") probit @@ -624,11 +294,10 @@ probit ProvideCare ProvideCare_L1 NeedCare ReceiveCare Dgn /// Y2020 Y2021 ${regions} ${ethnicity} /// if ${s3b_if_condition} [pweight=${weight}], vce(r) -process_regression, process("S3b") sheet("S3b") /// +process_regression, domain("socialcare") process("S3b") sheet("S3b") /// title("Process S3b: Prob. provide care, Partnered") /// gofrow(27) goflabel("S3b - Provide care, Partnered") /// ifcond("${s3b_if_condition}") probit - /******************* OLS care hours provided, Singles (S3c) ******************/ @@ -642,12 +311,12 @@ reg HrsProvidedInformalIHS HrsProvidedInformalIHS_L1 Dgn /// Y2020 Y2021 ${regions} ${ethnicity} /// if ${s3c_if_condition} [pweight=${weight}], vce(r) -process_regression, process("S3c") sheet("S3c") /// +process_regression, domain("socialcare") process("S3c") sheet("S3c") /// title("Process S3c: Informal care hours provided, Singles") /// gofrow(31) goflabel("S3c - Hours of informal care provided, Singles") /// ifcond("${s3c_if_condition}") - * Calculate RMSE +* Calculate RMSE cap drop residuals squared_residuals predict residuals, residuals gen squared_residuals = residuals^2 @@ -663,7 +332,6 @@ putexcel set "$dir_results/reg_RMSE.xlsx", sheet("UK") modify putexcel A11 = ("S3c") B11 = (rmse) restore - /****************** OLS care hours provided, Partnered (S3d) *****************/ @@ -679,7 +347,7 @@ reg HrsProvidedInformalIHS HrsProvidedInformalIHS_L1 Dgn /// Y2020 Y2021 ${regions} ${ethnicity} /// if ${s3d_if_condition} [pweight=${weight}], vce(r) -process_regression, process("S3d") sheet("S3d") /// +process_regression, domain("socialcare") process("S3d") sheet("S3d") /// title("Process S3d: Informal care hours provided, Partnered") /// gofrow(35) goflabel("S3d - Hours of informal care provided, Partnered") /// ifcond("${s3d_if_condition}") @@ -701,6 +369,5 @@ putexcel A12 = ("S3d") B12 = (rmse) restore - -display "Analysis complete!" +display "Social care analysis complete!" diff --git a/input/InitialPopulations/compile/RegressionEstimates/reg_wages.do b/input/InitialPopulations/compile/RegressionEstimates/reg_wages.do index 41cf4ffb7..0e0246f45 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/reg_wages.do +++ b/input/InitialPopulations/compile/RegressionEstimates/reg_wages.do @@ -429,7 +429,7 @@ restore * Labelling putexcel set "$dir_results/reg_employment_selection", sheet("W1fa-sel") modify -local var_list Les_c3_Student_L1 Les_c3_NotEmployed_L1 Dag Dag_sq Deh_c4_Medium Deh_c4_Low Deh_c4_Medium_Dag /// +local var_list Les_c3_Student_L1 Les_c3_NotEmployed_L1 Dag Dag_sq Deh_c4_Na Deh_c4_Medium Deh_c4_Low Deh_c4_Na_Dag Deh_c4_Medium_Dag /// Deh_c4_Low_Dag Dehmf_c3_Medium Dehmf_c3_Low Dcpst_Partnered D_Children Dlltsd01 Dhe_pcs_L1 Dhe_mcs_L1 /// UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN Y2020 Y2021 /// Ethn_Asian Ethn_Black Ethn_Other Constant @@ -1013,7 +1013,7 @@ restore * Labelling putexcel set "$dir_results/reg_employment_selection", sheet("W1fb-sel") modify -local var_list Dag Dag_sq Deh_c4_Medium Deh_c4_Low Deh_c4_Medium_Dag /// +local var_list Dag Dag_sq Deh_c4_High Deh_c4_Medium Deh_c4_Low Deh_c4_Na Deh_c4_Medium_Dag /// Deh_c4_Low_Dag Dehmf_c3_Medium Dehmf_c3_Low Dcpst_Partnered D_Children Dlltsd01 Dhe_pcs_L1 Dhe_mcs_L1 /// UKC UKD UKE UKF UKG UKH UKJ UKK UKL UKM UKN Y2020 Y2021 /// Ethn_Asian Ethn_Black Ethn_Other Constant diff --git a/input/InitialPopulations/compile/RegressionEstimates/variable_update.do b/input/InitialPopulations/compile/RegressionEstimates/variable_update.do index 24551baaa..953f629a3 100644 --- a/input/InitialPopulations/compile/RegressionEstimates/variable_update.do +++ b/input/InitialPopulations/compile/RegressionEstimates/variable_update.do @@ -387,8 +387,8 @@ replace dage10prime = 1 if (dag>34 & dag<45) replace dage10prime = 2 if (dag>44 & dag<55) replace dage10prime = 3 if (dag>54 & dag<65) replace dage10prime = 4 if (dag>64) -//table dage10prime, stat(min dag) stat(max dag) -table dage10prime, c(min dag max dag) +table dage10prime, stat(min dag) stat(max dag) +//table dage10prime, c(min dag max dag) * - Categorical: 65-66, 67-68, 69-70, 71-72..., 85+ gen dage2old = 0 @@ -396,8 +396,8 @@ forval ii = 1/10 { replace dage2old = `ii' if (dag >= 65+2*(`ii'-1) & dag < 67+2*(`ii'-1)) } replace dage2old = 11 if (dag >= 85) -//table dage2old, stat(min dag) stat(max dag) -table dage2old, c(min dag max dag) +table dage2old, stat(min dag) stat(max dag) +//table dage2old, c(min dag max dag) * Poor health flag gen poor_health = (dhe == 1) @@ -412,7 +412,7 @@ foreach var of varlist formal_socare_hrs partner_socare_hrs daughter_socare_hrs xtset idperson stm tab dage5, gen(Age_) -//table dage5, stat(min dag) stat(max dag) // RMK: AgeXX categories start at 1, hence shifted by 1 +table dage5, stat(min dag) stat(max dag) // RMK: AgeXX categories start at 1, hence shifted by 1 tabstat dag, by(dage5) stats(min max) drop Age_1 Age_2 @@ -432,8 +432,8 @@ cap rename Age_15 Age80to84 cap rename Age_16 Age85plus tab dage10prime, gen(Age_) -//table dage10prime, stat(min dag) stat(max dag) // RMK: AgeXX categories start at 1, hence shifted by 1 -table dage10prime, c(min dag max dag) +table dage10prime, stat(min dag) stat(max dag) // RMK: AgeXX categories start at 1, hence shifted by 1 +//table dage10prime, c(min dag max dag) drop Age_1 rename Age_2 Age35to44 rename Age_3 Age45to54 @@ -441,8 +441,8 @@ rename Age_4 Age55to64 rename Age_5 Age65plus tab dage2old, gen(Age_) -//table dage2old, stat(min dag) stat(max dag) // RMK: AgeXX categories start at 1, hence shifted by 1 -table dage2old, c(min dag max dag) +table dage2old, stat(min dag) stat(max dag) // RMK: AgeXX categories start at 1, hence shifted by 1 +//table dage2old, c(min dag max dag) drop Age_1 rename Age_2 Age65to66 rename Age_3 Age67to68 From 0f6761ace08fbb10ec4ce5331a59efb3742f7439 Mon Sep 17 00:00:00 2001 From: Ashley Burdett <76621383+ANBurdett@users.noreply.github.com> Date: Thu, 16 Apr 2026 17:45:39 +0100 Subject: [PATCH 2/2] Delete .DS_Store --- .DS_Store | Bin 10244 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 1a9d76103f7b49dead6334eac2fb04f54716e0eb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10244 zcmeHMU2GIp6uxI#+8H~@6kC4XVPSGI&%JlH*)9~HAc%95d(WJ6 z&$)Nc+4IfWdzTOb%^Cd>Lc)X)5j&-djKfVDou~bVCis&1NCE8$G0BMTCak#QST|?~ zf*=Ax1cC?z5eOm>MBr|S0M=~U#4=8UF^E7Afgl1i5uopfFm_5CeLBr4dv)NzjsQrD zQOp`X<2iuYgnioR(`inbDXz&+58#`Ef5ZT8j`LxvjxG@6s>|R0=#3430lz)DwI4dSPP9%sMA9GW2%Za1xvv|rfT`wBF z1VT~qypmF}R4fw@bRBlax;)LxxXoI6fO6e<2@uacYw`W2F*dAJv^ch>nYNK;2BvPH z%3!Z)>&{rSld*M|*>>>>Q4*zLWy$1ZZB2c3xVC=NRCRcAb4|25TvxkkYDy9-BOAAL z4v$(%+d08~1ONR%sAKuM#Blqy z@or<>qtDbZk#;O=pW~XI>DYZSZ?36Wzw4yE7PfB8#I5)O3+Z+pDQfKQH_|RrN%~;H zlJM$v57#zqZQ8MWZ|6m&eBlEXm9pHAv9&$(kZGiQ#!THi+^4w-!`4lEq&I82X52Em zZ7pe(L$xGbRF*DV{$NN|=TePj;^tEs)0b@X+Qqtjh<<+Ob^nMdR6XrF%pXAD{hR+a`z}Eh2qkWfTnfzvf5*5al=w{iSM)dF&%DcOP48Xx4K_cE{K>Irkynr2>2avJcyCTDxQdu zTs}t5j`Bwq=tyIfa}~LHpVFGRg4B>!(oPPNQ8Go2l2hbO@;*67z9i?#1#*S_LavcN z$lp)|B~S_?l*3|J3G1K+>YyGr!Q;>ld!YmNK_3`!2u2_U85o0cn1E;D7#xQaa1vgG zSKu{x9o~R<;6wNbK88==bNCv*fp6h^xC%eOkMI**gFoRfxGo4ng-|Ih5>^Q7geqaZ zP$$$2j|vS!o6s)w2t&e%Uv z@&b;vMT?iLT(dq}zj<5pOiuD7&gE@B8W;p-0f#|=rAR^gUgV&Z!#ThfqO%9FV~mRw z!b&OEj>xM;#EvqLvuoul5mBR*aCSpzqlh?F1jfoWp*jiCs1$K_L#RO#72wPkd8>rD zREjyP%1x4_5XOXBLpwyomLlH1q`yS2lApXy{YiKpUVxY26ub$a2n3Q zSvUvh;VT~kmk|KJ!tVu1y|*B#XVuJXSK785djxf;T*}o_5@zK#MJDpA(0qS7Lz2Y6 zTI>-GyGiSEPK~Z|3$=p%=Ap4oQY1sXyy|@>2d_I;y%$eT(&Am$5WPk5KF$Lm4r(=fVgK6?)VsY_=cRQ)_2l*F9v_Rie_?w6zkTli z|L^fA1nvb9_P>UXOmJY=@krTqJTmk;el2#Y s>tAoM=SE*P%_%cS9cGS{-{#-;KLc*>&0BE(56=I@E%-%ox1In00kP7*Jpcdz