Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Deferred items from PR reviews that were not addressed before merge.
| Multi-absorb weighted demeaning needs iterative alternating projections for N > 1 absorbed FE with survey weights; unweighted multi-absorb also uses single-pass (pre-existing, exact only for balanced panels) | `estimators.py` | #218 | Medium |
| CallawaySantAnna survey: strata/PSU/FPC rejected at runtime. Full design-based SEs require routing the combined IF/WIF through `compute_survey_vcov()`. Currently weights-only. | `staggered.py` | #233 | Medium |
| CallawaySantAnna survey + covariates + IPW/DR: DRDID panel nuisance-estimation IF corrections not implemented. Currently gated with NotImplementedError. Regression method with covariates works (has WLS nuisance IF correction). | `staggered.py` | #233 | Medium |
| SyntheticDiD/TROP survey: strata/PSU/FPC deferred. Full design-based bootstrap (Rao-Wu rescaled weights) needed for survey-aware resampling. Currently pweight-only. | `synthetic_did.py`, `trop.py` | — | Medium |
| EfficientDiD hausman_pretest() clustered covariance uses stale `n_cl` after filtering non-finite EIF rows — should recompute effective cluster count and remap indices after `row_finite` filtering | `efficient_did.py` | #230 | Medium |
| EfficientDiD `control_group="last_cohort"` trims at `last_g - anticipation` but REGISTRY says `t >= last_g`. With `anticipation=0` (default) these are identical. With `anticipation>0`, code is arguably more conservative (excludes anticipation-contaminated periods). Either align REGISTRY with code or change code to `t < last_g` — needs design decision. | `efficient_did.py` | #230 | Low |
| TripleDifference power: `generate_ddd_data` is a fixed 2×2×2 cross-sectional DGP — no multi-period or unbalanced-group support. Add a `generate_ddd_panel_data` for panel DDD power analysis. | `prep_dgp.py`, `power.py` | #208 | Low |
Expand Down
33 changes: 33 additions & 0 deletions diff_diff/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,8 @@ class SyntheticDiDResults:
pre_treatment_fit: Optional[float] = field(default=None)
placebo_effects: Optional[np.ndarray] = field(default=None)
n_bootstrap: Optional[int] = field(default=None)
# Survey design metadata (SurveyMetadata instance from diff_diff.survey)
survey_metadata: Optional[Any] = field(default=None)

def __repr__(self) -> str:
"""Concise string representation."""
Expand Down Expand Up @@ -735,6 +737,28 @@ def summary(self, alpha: Optional[float] = None) -> str:
if self.variance_method == "bootstrap" and self.n_bootstrap is not None:
lines.append(f"{'Bootstrap replications:':<25} {self.n_bootstrap:>10}")

# Add survey design info
if self.survey_metadata is not None:
sm = self.survey_metadata
lines.extend(
[
"",
"-" * 75,
"Survey Design".center(75),
"-" * 75,
f"{'Weight type:':<25} {sm.weight_type:>10}",
]
)
if sm.n_strata is not None:
lines.append(f"{'Strata:':<25} {sm.n_strata:>10}")
if sm.n_psu is not None:
lines.append(f"{'PSU/Cluster:':<25} {sm.n_psu:>10}")
lines.append(f"{'Effective sample size:':<25} {sm.effective_n:>10.1f}")
lines.append(f"{'Design effect (DEFF):':<25} {sm.design_effect:>10.2f}")
if sm.df_survey is not None:
lines.append(f"{'Survey d.f.:':<25} {sm.df_survey:>10}")
lines.append("-" * 75)

lines.extend(
[
"",
Expand Down Expand Up @@ -812,6 +836,15 @@ def to_dict(self) -> Dict[str, Any]:
}
if self.n_bootstrap is not None:
result["n_bootstrap"] = self.n_bootstrap
if self.survey_metadata is not None:
sm = self.survey_metadata
result["weight_type"] = sm.weight_type
result["effective_n"] = sm.effective_n
result["design_effect"] = sm.design_effect
result["sum_weights"] = sm.sum_weights
result["n_strata"] = sm.n_strata
result["n_psu"] = sm.n_psu
result["df_survey"] = sm.df_survey
return result

def to_dataframe(self) -> pd.DataFrame:
Expand Down
60 changes: 60 additions & 0 deletions diff_diff/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,66 @@ def _validate_unit_constant_survey(data, unit_col, survey_design):
)


def _resolve_pweight_only(resolved_survey, estimator_name):
"""Guard: reject non-pweight and strata/PSU/FPC for pweight-only estimators.

Parameters
----------
resolved_survey : ResolvedSurveyDesign or None
Resolved survey design. If None, returns immediately.
estimator_name : str
Estimator name for error messages.

Raises
------
ValueError
If weight_type is not 'pweight'.
NotImplementedError
If strata, PSU, or FPC are present.
"""
if resolved_survey is None:
return
if resolved_survey.weight_type != "pweight":
raise ValueError(
f"{estimator_name} survey support requires weight_type='pweight'. "
f"Got '{resolved_survey.weight_type}'."
)
if (
resolved_survey.strata is not None
or resolved_survey.psu is not None
or resolved_survey.fpc is not None
):
raise NotImplementedError(
f"{estimator_name} does not yet support strata/PSU/FPC in "
"SurveyDesign. Use SurveyDesign(weights=...) only. Full "
"design-based bootstrap is planned for the Bootstrap + "
"Survey Interaction phase."
)


def _extract_unit_survey_weights(data, unit_col, survey_design, unit_order):
"""Extract unit-level survey weights aligned to a given unit ordering.

Parameters
----------
data : pd.DataFrame
Panel data with survey weight column.
unit_col : str
Unit identifier column name.
survey_design : SurveyDesign
Survey design (uses ``weights`` column name).
unit_order : array-like
Ordered sequence of unit identifiers to align weights to.

Returns
-------
np.ndarray
Float64 array of unit-level weights, one per unit in ``unit_order``.
"""
unit_w = data.groupby(unit_col)[survey_design.weights].first()
return np.array([unit_w[u] for u in unit_order], dtype=np.float64)


def _resolve_survey_for_fit(survey_design, data, inference_mode="analytical"):
"""
Shared helper: validate and resolve a SurveyDesign for an estimator fit() call.
Expand Down
Loading
Loading