diff --git a/TODO.md b/TODO.md
index 96e176a..98c3c89 100644
--- a/TODO.md
+++ b/TODO.md
@@ -62,6 +62,7 @@ Deferred items from PR reviews that were not addressed before merge.
 | Tutorial notebooks not executed in CI | `docs/tutorials/*.ipynb` | #159 | Low |
 | R comparison tests spawn separate `Rscript` per test (slow CI) | `tests/test_methodology_twfe.py:294` | #139 | Low |
 | CS R helpers hard-code `xformla = ~ 1`; no covariate-adjusted R benchmark for IRLS path | `tests/test_methodology_callaway.py` | #202 | Low |
+| Context-dependent doc snippets pass via blanket NameError; no standalone validation | `tests/test_doc_snippets.py`, `docs/api/visualization.rst`, `docs/python_comparison.rst`, `docs/r_comparison.rst` | #206 | Low |
 
 ---
 
diff --git a/diff_diff/datasets.py b/diff_diff/datasets.py
index d676a5c..f6170cb 100644
--- a/diff_diff/datasets.py
+++ b/diff_diff/datasets.py
@@ -266,7 +266,7 @@ def load_castle_doctrine(force_download: bool = False) -> pd.DataFrame:
     ...     outcome="homicide_rate",
     ...     unit="state",
     ...     time="year",
-    ...     cohort="first_treat"
+    ...     first_treat="first_treat"
     ... )
     """
     url = "https://raw.githubusercontent.com/causaldata/causal_datasets/main/castle/castle.csv"
@@ -412,7 +412,7 @@ def load_divorce_laws(force_download: bool = False) -> pd.DataFrame:
     ...     outcome="divorce_rate",
     ...     unit="state",
     ...     time="year",
-    ...     cohort="first_treat"
+    ...     first_treat="first_treat"
     ... )
     """
     # Try to load from causaldata repository
@@ -571,7 +571,7 @@ def load_mpdta(force_download: bool = False) -> pd.DataFrame:
     ...     outcome="lemp",
     ...     unit="countyreal",
     ...     time="year",
-    ...     cohort="first_treat"
+    ...     first_treat="first_treat"
     ... )
     """
     # mpdta is available from the did package documentation
diff --git a/docs/api/_autosummary/diff_diff.BaconDecomposition.rst b/docs/api/_autosummary/diff_diff.BaconDecomposition.rst
new file mode 100644
index 0000000..6086f86
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.BaconDecomposition.rst
@@ -0,0 +1,27 @@
+﻿diff\_diff.BaconDecomposition
+=============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: BaconDecomposition
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~BaconDecomposition.__init__
+      ~BaconDecomposition.fit
+      ~BaconDecomposition.get_params
+      ~BaconDecomposition.print_summary
+      ~BaconDecomposition.set_params
+      ~BaconDecomposition.summary
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.BaconDecompositionResults.rst b/docs/api/_autosummary/diff_diff.BaconDecompositionResults.rst
new file mode 100644
index 0000000..a9efaff
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.BaconDecompositionResults.rst
@@ -0,0 +1,45 @@
+﻿diff\_diff.BaconDecompositionResults
+====================================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: BaconDecompositionResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~BaconDecompositionResults.__init__
+      ~BaconDecompositionResults.effect_by_type
+      ~BaconDecompositionResults.print_summary
+      ~BaconDecompositionResults.summary
+      ~BaconDecompositionResults.to_dataframe
+      ~BaconDecompositionResults.weight_by_type
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~BaconDecompositionResults.decomposition_error
+      ~BaconDecompositionResults.n_obs
+      ~BaconDecompositionResults.twfe_estimate
+      ~BaconDecompositionResults.comparisons
+      ~BaconDecompositionResults.total_weight_treated_vs_never
+      ~BaconDecompositionResults.total_weight_earlier_vs_later
+      ~BaconDecompositionResults.total_weight_later_vs_earlier
+      ~BaconDecompositionResults.weighted_avg_treated_vs_never
+      ~BaconDecompositionResults.weighted_avg_earlier_vs_later
+      ~BaconDecompositionResults.weighted_avg_later_vs_earlier
+      ~BaconDecompositionResults.n_timing_groups
+      ~BaconDecompositionResults.n_never_treated
+      ~BaconDecompositionResults.timing_groups
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.CSBootstrapResults.rst b/docs/api/_autosummary/diff_diff.CSBootstrapResults.rst
new file mode 100644
index 0000000..88bdda6
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.CSBootstrapResults.rst
@@ -0,0 +1,44 @@
+﻿diff\_diff.CSBootstrapResults
+=============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: CSBootstrapResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~CSBootstrapResults.__init__
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~CSBootstrapResults.bootstrap_distribution
+      ~CSBootstrapResults.cband_crit_value
+      ~CSBootstrapResults.event_study_cis
+      ~CSBootstrapResults.event_study_p_values
+      ~CSBootstrapResults.event_study_ses
+      ~CSBootstrapResults.group_effect_cis
+      ~CSBootstrapResults.group_effect_p_values
+      ~CSBootstrapResults.group_effect_ses
+      ~CSBootstrapResults.n_bootstrap
+      ~CSBootstrapResults.weight_type
+      ~CSBootstrapResults.alpha
+      ~CSBootstrapResults.overall_att_se
+      ~CSBootstrapResults.overall_att_ci
+      ~CSBootstrapResults.overall_att_p_value
+      ~CSBootstrapResults.group_time_ses
+      ~CSBootstrapResults.group_time_cis
+      ~CSBootstrapResults.group_time_p_values
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.CallawaySantAnna.rst b/docs/api/_autosummary/diff_diff.CallawaySantAnna.rst
index 336b41c..cf2dc47 100644
--- a/docs/api/_autosummary/diff_diff.CallawaySantAnna.rst
+++ b/docs/api/_autosummary/diff_diff.CallawaySantAnna.rst
@@ -24,4 +24,15 @@
 
    
    
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~CallawaySantAnna.n_bootstrap
+      ~CallawaySantAnna.bootstrap_weight_type
+      ~CallawaySantAnna.alpha
+      ~CallawaySantAnna.seed
+      ~CallawaySantAnna.anticipation
+      ~CallawaySantAnna.base_period
+   
    
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.CallawaySantAnnaResults.rst b/docs/api/_autosummary/diff_diff.CallawaySantAnnaResults.rst
index 4037a73..dc2f10f 100644
--- a/docs/api/_autosummary/diff_diff.CallawaySantAnnaResults.rst
+++ b/docs/api/_autosummary/diff_diff.CallawaySantAnnaResults.rst
@@ -27,11 +27,15 @@
    .. autosummary::
    
       ~CallawaySantAnnaResults.alpha
+      ~CallawaySantAnnaResults.base_period
+      ~CallawaySantAnnaResults.bootstrap_results
+      ~CallawaySantAnnaResults.cband_crit_value
       ~CallawaySantAnnaResults.control_group
       ~CallawaySantAnnaResults.event_study_effects
       ~CallawaySantAnnaResults.group_effects
       ~CallawaySantAnnaResults.influence_functions
       ~CallawaySantAnnaResults.is_significant
+      ~CallawaySantAnnaResults.pscore_trim
       ~CallawaySantAnnaResults.significance_stars
       ~CallawaySantAnnaResults.group_time_effects
       ~CallawaySantAnnaResults.overall_att
diff --git a/docs/api/_autosummary/diff_diff.Comparison2x2.rst b/docs/api/_autosummary/diff_diff.Comparison2x2.rst
new file mode 100644
index 0000000..718f5e8
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.Comparison2x2.rst
@@ -0,0 +1,35 @@
+﻿diff\_diff.Comparison2x2
+========================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: Comparison2x2
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~Comparison2x2.__init__
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~Comparison2x2.treated_group
+      ~Comparison2x2.control_group
+      ~Comparison2x2.comparison_type
+      ~Comparison2x2.estimate
+      ~Comparison2x2.weight
+      ~Comparison2x2.n_treated
+      ~Comparison2x2.n_control
+      ~Comparison2x2.time_window
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.ContinuousDiD.rst b/docs/api/_autosummary/diff_diff.ContinuousDiD.rst
new file mode 100644
index 0000000..4dd604b
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.ContinuousDiD.rst
@@ -0,0 +1,25 @@
+﻿diff\_diff.ContinuousDiD
+========================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: ContinuousDiD
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~ContinuousDiD.__init__
+      ~ContinuousDiD.fit
+      ~ContinuousDiD.get_params
+      ~ContinuousDiD.set_params
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.ContinuousDiDResults.rst b/docs/api/_autosummary/diff_diff.ContinuousDiDResults.rst
new file mode 100644
index 0000000..ad1df6e
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.ContinuousDiDResults.rst
@@ -0,0 +1,62 @@
+﻿diff\_diff.ContinuousDiDResults
+===============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: ContinuousDiDResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~ContinuousDiDResults.__init__
+      ~ContinuousDiDResults.print_summary
+      ~ContinuousDiDResults.summary
+      ~ContinuousDiDResults.to_dataframe
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~ContinuousDiDResults.alpha
+      ~ContinuousDiDResults.anticipation
+      ~ContinuousDiDResults.base_period
+      ~ContinuousDiDResults.bootstrap_weights
+      ~ContinuousDiDResults.control_group
+      ~ContinuousDiDResults.degree
+      ~ContinuousDiDResults.event_study_effects
+      ~ContinuousDiDResults.is_significant
+      ~ContinuousDiDResults.n_bootstrap
+      ~ContinuousDiDResults.num_knots
+      ~ContinuousDiDResults.rank_deficient_action
+      ~ContinuousDiDResults.seed
+      ~ContinuousDiDResults.significance_stars
+      ~ContinuousDiDResults.dose_response_att
+      ~ContinuousDiDResults.dose_response_acrt
+      ~ContinuousDiDResults.overall_att
+      ~ContinuousDiDResults.overall_att_se
+      ~ContinuousDiDResults.overall_att_t_stat
+      ~ContinuousDiDResults.overall_att_p_value
+      ~ContinuousDiDResults.overall_att_conf_int
+      ~ContinuousDiDResults.overall_acrt
+      ~ContinuousDiDResults.overall_acrt_se
+      ~ContinuousDiDResults.overall_acrt_t_stat
+      ~ContinuousDiDResults.overall_acrt_p_value
+      ~ContinuousDiDResults.overall_acrt_conf_int
+      ~ContinuousDiDResults.group_time_effects
+      ~ContinuousDiDResults.dose_grid
+      ~ContinuousDiDResults.groups
+      ~ContinuousDiDResults.time_periods
+      ~ContinuousDiDResults.n_obs
+      ~ContinuousDiDResults.n_treated_units
+      ~ContinuousDiDResults.n_control_units
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.DoseResponseCurve.rst b/docs/api/_autosummary/diff_diff.DoseResponseCurve.rst
new file mode 100644
index 0000000..cbafdac
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.DoseResponseCurve.rst
@@ -0,0 +1,36 @@
+﻿diff\_diff.DoseResponseCurve
+============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: DoseResponseCurve
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~DoseResponseCurve.__init__
+      ~DoseResponseCurve.to_dataframe
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~DoseResponseCurve.n_bootstrap
+      ~DoseResponseCurve.p_value
+      ~DoseResponseCurve.dose_grid
+      ~DoseResponseCurve.effects
+      ~DoseResponseCurve.se
+      ~DoseResponseCurve.conf_int_lower
+      ~DoseResponseCurve.conf_int_upper
+      ~DoseResponseCurve.target
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.EDiDBootstrapResults.rst b/docs/api/_autosummary/diff_diff.EDiDBootstrapResults.rst
new file mode 100644
index 0000000..bbd47e3
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.EDiDBootstrapResults.rst
@@ -0,0 +1,43 @@
+﻿diff\_diff.EDiDBootstrapResults
+===============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: EDiDBootstrapResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~EDiDBootstrapResults.__init__
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~EDiDBootstrapResults.bootstrap_distribution
+      ~EDiDBootstrapResults.event_study_cis
+      ~EDiDBootstrapResults.event_study_p_values
+      ~EDiDBootstrapResults.event_study_ses
+      ~EDiDBootstrapResults.group_effect_cis
+      ~EDiDBootstrapResults.group_effect_p_values
+      ~EDiDBootstrapResults.group_effect_ses
+      ~EDiDBootstrapResults.n_bootstrap
+      ~EDiDBootstrapResults.weight_type
+      ~EDiDBootstrapResults.alpha
+      ~EDiDBootstrapResults.overall_att_se
+      ~EDiDBootstrapResults.overall_att_ci
+      ~EDiDBootstrapResults.overall_att_p_value
+      ~EDiDBootstrapResults.group_time_ses
+      ~EDiDBootstrapResults.group_time_cis
+      ~EDiDBootstrapResults.group_time_p_values
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.EfficientDiD.rst b/docs/api/_autosummary/diff_diff.EfficientDiD.rst
new file mode 100644
index 0000000..6a53b57
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.EfficientDiD.rst
@@ -0,0 +1,37 @@
+﻿diff\_diff.EfficientDiD
+=======================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: EfficientDiD
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~EfficientDiD.__init__
+      ~EfficientDiD.fit
+      ~EfficientDiD.get_params
+      ~EfficientDiD.print_summary
+      ~EfficientDiD.set_params
+      ~EfficientDiD.summary
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~EfficientDiD.n_bootstrap
+      ~EfficientDiD.bootstrap_weights
+      ~EfficientDiD.alpha
+      ~EfficientDiD.seed
+      ~EfficientDiD.anticipation
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.EfficientDiDResults.rst b/docs/api/_autosummary/diff_diff.EfficientDiDResults.rst
new file mode 100644
index 0000000..58fa230
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.EfficientDiDResults.rst
@@ -0,0 +1,55 @@
+﻿diff\_diff.EfficientDiDResults
+==============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: EfficientDiDResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~EfficientDiDResults.__init__
+      ~EfficientDiDResults.print_summary
+      ~EfficientDiDResults.summary
+      ~EfficientDiDResults.to_dataframe
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~EfficientDiDResults.alpha
+      ~EfficientDiDResults.anticipation
+      ~EfficientDiDResults.bootstrap_results
+      ~EfficientDiDResults.bootstrap_weights
+      ~EfficientDiDResults.efficient_weights
+      ~EfficientDiDResults.event_study_effects
+      ~EfficientDiDResults.group_effects
+      ~EfficientDiDResults.influence_functions
+      ~EfficientDiDResults.is_significant
+      ~EfficientDiDResults.n_bootstrap
+      ~EfficientDiDResults.omega_condition_numbers
+      ~EfficientDiDResults.pt_assumption
+      ~EfficientDiDResults.seed
+      ~EfficientDiDResults.significance_stars
+      ~EfficientDiDResults.group_time_effects
+      ~EfficientDiDResults.overall_att
+      ~EfficientDiDResults.overall_se
+      ~EfficientDiDResults.overall_t_stat
+      ~EfficientDiDResults.overall_p_value
+      ~EfficientDiDResults.overall_conf_int
+      ~EfficientDiDResults.groups
+      ~EfficientDiDResults.time_periods
+      ~EfficientDiDResults.n_obs
+      ~EfficientDiDResults.n_treated_units
+      ~EfficientDiDResults.n_control_units
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.ImputationBootstrapResults.rst b/docs/api/_autosummary/diff_diff.ImputationBootstrapResults.rst
new file mode 100644
index 0000000..62d6e57
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.ImputationBootstrapResults.rst
@@ -0,0 +1,40 @@
+﻿diff\_diff.ImputationBootstrapResults
+=====================================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: ImputationBootstrapResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~ImputationBootstrapResults.__init__
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~ImputationBootstrapResults.bootstrap_distribution
+      ~ImputationBootstrapResults.event_study_cis
+      ~ImputationBootstrapResults.event_study_p_values
+      ~ImputationBootstrapResults.event_study_ses
+      ~ImputationBootstrapResults.group_cis
+      ~ImputationBootstrapResults.group_p_values
+      ~ImputationBootstrapResults.group_ses
+      ~ImputationBootstrapResults.n_bootstrap
+      ~ImputationBootstrapResults.weight_type
+      ~ImputationBootstrapResults.alpha
+      ~ImputationBootstrapResults.overall_att_se
+      ~ImputationBootstrapResults.overall_att_ci
+      ~ImputationBootstrapResults.overall_att_p_value
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.ImputationDiD.rst b/docs/api/_autosummary/diff_diff.ImputationDiD.rst
new file mode 100644
index 0000000..21995ef
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.ImputationDiD.rst
@@ -0,0 +1,27 @@
+﻿diff\_diff.ImputationDiD
+========================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: ImputationDiD
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~ImputationDiD.__init__
+      ~ImputationDiD.fit
+      ~ImputationDiD.get_params
+      ~ImputationDiD.print_summary
+      ~ImputationDiD.set_params
+      ~ImputationDiD.summary
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.ImputationDiDResults.rst b/docs/api/_autosummary/diff_diff.ImputationDiDResults.rst
new file mode 100644
index 0000000..5295eff
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.ImputationDiDResults.rst
@@ -0,0 +1,51 @@
+﻿diff\_diff.ImputationDiDResults
+===============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: ImputationDiDResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~ImputationDiDResults.__init__
+      ~ImputationDiDResults.pretrend_test
+      ~ImputationDiDResults.print_summary
+      ~ImputationDiDResults.summary
+      ~ImputationDiDResults.to_dataframe
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~ImputationDiDResults.alpha
+      ~ImputationDiDResults.bootstrap_results
+      ~ImputationDiDResults.is_significant
+      ~ImputationDiDResults.pretrend_results
+      ~ImputationDiDResults.significance_stars
+      ~ImputationDiDResults.treatment_effects
+      ~ImputationDiDResults.overall_att
+      ~ImputationDiDResults.overall_se
+      ~ImputationDiDResults.overall_t_stat
+      ~ImputationDiDResults.overall_p_value
+      ~ImputationDiDResults.overall_conf_int
+      ~ImputationDiDResults.event_study_effects
+      ~ImputationDiDResults.group_effects
+      ~ImputationDiDResults.groups
+      ~ImputationDiDResults.time_periods
+      ~ImputationDiDResults.n_obs
+      ~ImputationDiDResults.n_treated_obs
+      ~ImputationDiDResults.n_untreated_obs
+      ~ImputationDiDResults.n_treated_units
+      ~ImputationDiDResults.n_control_units
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.MultiPeriodDiDResults.rst b/docs/api/_autosummary/diff_diff.MultiPeriodDiDResults.rst
index 8084ebe..fee778a 100644
--- a/docs/api/_autosummary/diff_diff.MultiPeriodDiDResults.rst
+++ b/docs/api/_autosummary/diff_diff.MultiPeriodDiDResults.rst
@@ -31,8 +31,12 @@
       ~MultiPeriodDiDResults.alpha
       ~MultiPeriodDiDResults.coefficients
       ~MultiPeriodDiDResults.fitted_values
+      ~MultiPeriodDiDResults.interaction_indices
       ~MultiPeriodDiDResults.is_significant
+      ~MultiPeriodDiDResults.post_period_effects
+      ~MultiPeriodDiDResults.pre_period_effects
       ~MultiPeriodDiDResults.r_squared
+      ~MultiPeriodDiDResults.reference_period
       ~MultiPeriodDiDResults.residuals
       ~MultiPeriodDiDResults.significance_stars
       ~MultiPeriodDiDResults.vcov
diff --git a/docs/api/_autosummary/diff_diff.PowerAnalysis.rst b/docs/api/_autosummary/diff_diff.PowerAnalysis.rst
new file mode 100644
index 0000000..ee97ad0
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.PowerAnalysis.rst
@@ -0,0 +1,27 @@
+﻿diff\_diff.PowerAnalysis
+========================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: PowerAnalysis
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~PowerAnalysis.__init__
+      ~PowerAnalysis.mde
+      ~PowerAnalysis.power
+      ~PowerAnalysis.power_curve
+      ~PowerAnalysis.sample_size
+      ~PowerAnalysis.sample_size_curve
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.PowerResults.rst b/docs/api/_autosummary/diff_diff.PowerResults.rst
new file mode 100644
index 0000000..46f7948
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.PowerResults.rst
@@ -0,0 +1,44 @@
+﻿diff\_diff.PowerResults
+=======================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: PowerResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~PowerResults.__init__
+      ~PowerResults.print_summary
+      ~PowerResults.summary
+      ~PowerResults.to_dataframe
+      ~PowerResults.to_dict
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~PowerResults.design
+      ~PowerResults.rho
+      ~PowerResults.power
+      ~PowerResults.mde
+      ~PowerResults.required_n
+      ~PowerResults.effect_size
+      ~PowerResults.alpha
+      ~PowerResults.alternative
+      ~PowerResults.n_treated
+      ~PowerResults.n_control
+      ~PowerResults.n_pre
+      ~PowerResults.n_post
+      ~PowerResults.sigma
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.PreTrendsPower.rst b/docs/api/_autosummary/diff_diff.PreTrendsPower.rst
new file mode 100644
index 0000000..c685085
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.PreTrendsPower.rst
@@ -0,0 +1,28 @@
+﻿diff\_diff.PreTrendsPower
+=========================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: PreTrendsPower
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~PreTrendsPower.__init__
+      ~PreTrendsPower.fit
+      ~PreTrendsPower.get_params
+      ~PreTrendsPower.power_at
+      ~PreTrendsPower.power_curve
+      ~PreTrendsPower.sensitivity_to_honest_did
+      ~PreTrendsPower.set_params
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.PreTrendsPowerCurve.rst b/docs/api/_autosummary/diff_diff.PreTrendsPowerCurve.rst
new file mode 100644
index 0000000..70caeb7
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.PreTrendsPowerCurve.rst
@@ -0,0 +1,35 @@
+﻿diff\_diff.PreTrendsPowerCurve
+==============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: PreTrendsPowerCurve
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~PreTrendsPowerCurve.__init__
+      ~PreTrendsPowerCurve.plot
+      ~PreTrendsPowerCurve.to_dataframe
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~PreTrendsPowerCurve.M_values
+      ~PreTrendsPowerCurve.powers
+      ~PreTrendsPowerCurve.mdv
+      ~PreTrendsPowerCurve.alpha
+      ~PreTrendsPowerCurve.target_power
+      ~PreTrendsPowerCurve.violation_type
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.PreTrendsPowerResults.rst b/docs/api/_autosummary/diff_diff.PreTrendsPowerResults.rst
new file mode 100644
index 0000000..da32052
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.PreTrendsPowerResults.rst
@@ -0,0 +1,48 @@
+﻿diff\_diff.PreTrendsPowerResults
+================================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: PreTrendsPowerResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~PreTrendsPowerResults.__init__
+      ~PreTrendsPowerResults.power_at
+      ~PreTrendsPowerResults.print_summary
+      ~PreTrendsPowerResults.summary
+      ~PreTrendsPowerResults.to_dataframe
+      ~PreTrendsPowerResults.to_dict
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~PreTrendsPowerResults.is_informative
+      ~PreTrendsPowerResults.original_results
+      ~PreTrendsPowerResults.power_adequate
+      ~PreTrendsPowerResults.power
+      ~PreTrendsPowerResults.mdv
+      ~PreTrendsPowerResults.violation_magnitude
+      ~PreTrendsPowerResults.violation_type
+      ~PreTrendsPowerResults.alpha
+      ~PreTrendsPowerResults.target_power
+      ~PreTrendsPowerResults.n_pre_periods
+      ~PreTrendsPowerResults.test_statistic
+      ~PreTrendsPowerResults.critical_value
+      ~PreTrendsPowerResults.noncentrality
+      ~PreTrendsPowerResults.pre_period_effects
+      ~PreTrendsPowerResults.pre_period_ses
+      ~PreTrendsPowerResults.vcov
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.SABootstrapResults.rst b/docs/api/_autosummary/diff_diff.SABootstrapResults.rst
new file mode 100644
index 0000000..a39d3b3
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.SABootstrapResults.rst
@@ -0,0 +1,37 @@
+﻿diff\_diff.SABootstrapResults
+=============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: SABootstrapResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~SABootstrapResults.__init__
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~SABootstrapResults.bootstrap_distribution
+      ~SABootstrapResults.n_bootstrap
+      ~SABootstrapResults.weight_type
+      ~SABootstrapResults.alpha
+      ~SABootstrapResults.overall_att_se
+      ~SABootstrapResults.overall_att_ci
+      ~SABootstrapResults.overall_att_p_value
+      ~SABootstrapResults.event_study_ses
+      ~SABootstrapResults.event_study_cis
+      ~SABootstrapResults.event_study_p_values
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.SimulationPowerResults.rst b/docs/api/_autosummary/diff_diff.SimulationPowerResults.rst
new file mode 100644
index 0000000..0a4f46e
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.SimulationPowerResults.rst
@@ -0,0 +1,49 @@
+﻿diff\_diff.SimulationPowerResults
+=================================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: SimulationPowerResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~SimulationPowerResults.__init__
+      ~SimulationPowerResults.power_curve_df
+      ~SimulationPowerResults.print_summary
+      ~SimulationPowerResults.summary
+      ~SimulationPowerResults.to_dataframe
+      ~SimulationPowerResults.to_dict
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~SimulationPowerResults.simulation_results
+      ~SimulationPowerResults.power
+      ~SimulationPowerResults.power_se
+      ~SimulationPowerResults.power_ci
+      ~SimulationPowerResults.rejection_rate
+      ~SimulationPowerResults.mean_estimate
+      ~SimulationPowerResults.std_estimate
+      ~SimulationPowerResults.mean_se
+      ~SimulationPowerResults.coverage
+      ~SimulationPowerResults.n_simulations
+      ~SimulationPowerResults.effect_sizes
+      ~SimulationPowerResults.powers
+      ~SimulationPowerResults.true_effect
+      ~SimulationPowerResults.alpha
+      ~SimulationPowerResults.estimator_name
+      ~SimulationPowerResults.bias
+      ~SimulationPowerResults.rmse
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.StackedDiD.rst b/docs/api/_autosummary/diff_diff.StackedDiD.rst
new file mode 100644
index 0000000..50546d3
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.StackedDiD.rst
@@ -0,0 +1,27 @@
+﻿diff\_diff.StackedDiD
+=====================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: StackedDiD
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~StackedDiD.__init__
+      ~StackedDiD.fit
+      ~StackedDiD.get_params
+      ~StackedDiD.print_summary
+      ~StackedDiD.set_params
+      ~StackedDiD.summary
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.StackedDiDResults.rst b/docs/api/_autosummary/diff_diff.StackedDiDResults.rst
new file mode 100644
index 0000000..b958630
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.StackedDiDResults.rst
@@ -0,0 +1,53 @@
+﻿diff\_diff.StackedDiDResults
+============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: StackedDiDResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~StackedDiDResults.__init__
+      ~StackedDiDResults.print_summary
+      ~StackedDiDResults.summary
+      ~StackedDiDResults.to_dataframe
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~StackedDiDResults.alpha
+      ~StackedDiDResults.clean_control
+      ~StackedDiDResults.is_significant
+      ~StackedDiDResults.kappa_post
+      ~StackedDiDResults.kappa_pre
+      ~StackedDiDResults.n_control_units
+      ~StackedDiDResults.n_obs
+      ~StackedDiDResults.n_stacked_obs
+      ~StackedDiDResults.n_sub_experiments
+      ~StackedDiDResults.n_treated_units
+      ~StackedDiDResults.significance_stars
+      ~StackedDiDResults.weighting
+      ~StackedDiDResults.overall_att
+      ~StackedDiDResults.overall_se
+      ~StackedDiDResults.overall_t_stat
+      ~StackedDiDResults.overall_p_value
+      ~StackedDiDResults.overall_conf_int
+      ~StackedDiDResults.event_study_effects
+      ~StackedDiDResults.group_effects
+      ~StackedDiDResults.stacked_data
+      ~StackedDiDResults.groups
+      ~StackedDiDResults.trimmed_groups
+      ~StackedDiDResults.time_periods
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.SunAbraham.rst b/docs/api/_autosummary/diff_diff.SunAbraham.rst
new file mode 100644
index 0000000..eadb360
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.SunAbraham.rst
@@ -0,0 +1,27 @@
+﻿diff\_diff.SunAbraham
+=====================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: SunAbraham
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~SunAbraham.__init__
+      ~SunAbraham.fit
+      ~SunAbraham.get_params
+      ~SunAbraham.print_summary
+      ~SunAbraham.set_params
+      ~SunAbraham.summary
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.SunAbrahamResults.rst b/docs/api/_autosummary/diff_diff.SunAbrahamResults.rst
new file mode 100644
index 0000000..fb25997
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.SunAbrahamResults.rst
@@ -0,0 +1,48 @@
+﻿diff\_diff.SunAbrahamResults
+============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: SunAbrahamResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~SunAbrahamResults.__init__
+      ~SunAbrahamResults.print_summary
+      ~SunAbrahamResults.summary
+      ~SunAbrahamResults.to_dataframe
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~SunAbrahamResults.alpha
+      ~SunAbrahamResults.bootstrap_results
+      ~SunAbrahamResults.cohort_effects
+      ~SunAbrahamResults.control_group
+      ~SunAbrahamResults.is_significant
+      ~SunAbrahamResults.significance_stars
+      ~SunAbrahamResults.event_study_effects
+      ~SunAbrahamResults.overall_att
+      ~SunAbrahamResults.overall_se
+      ~SunAbrahamResults.overall_t_stat
+      ~SunAbrahamResults.overall_p_value
+      ~SunAbrahamResults.overall_conf_int
+      ~SunAbrahamResults.cohort_weights
+      ~SunAbrahamResults.groups
+      ~SunAbrahamResults.time_periods
+      ~SunAbrahamResults.n_obs
+      ~SunAbrahamResults.n_treated_units
+      ~SunAbrahamResults.n_control_units
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.SyntheticDiDResults.rst b/docs/api/_autosummary/diff_diff.SyntheticDiDResults.rst
index ef74b20..1b996bd 100644
--- a/docs/api/_autosummary/diff_diff.SyntheticDiDResults.rst
+++ b/docs/api/_autosummary/diff_diff.SyntheticDiDResults.rst
@@ -31,9 +31,14 @@
    
       ~SyntheticDiDResults.alpha
       ~SyntheticDiDResults.is_significant
+      ~SyntheticDiDResults.n_bootstrap
+      ~SyntheticDiDResults.noise_level
       ~SyntheticDiDResults.placebo_effects
       ~SyntheticDiDResults.pre_treatment_fit
       ~SyntheticDiDResults.significance_stars
+      ~SyntheticDiDResults.variance_method
+      ~SyntheticDiDResults.zeta_lambda
+      ~SyntheticDiDResults.zeta_omega
       ~SyntheticDiDResults.att
       ~SyntheticDiDResults.se
       ~SyntheticDiDResults.t_stat
@@ -46,10 +51,5 @@
       ~SyntheticDiDResults.time_weights
       ~SyntheticDiDResults.pre_periods
       ~SyntheticDiDResults.post_periods
-      ~SyntheticDiDResults.variance_method
-      ~SyntheticDiDResults.noise_level
-      ~SyntheticDiDResults.zeta_omega
-      ~SyntheticDiDResults.zeta_lambda
-      ~SyntheticDiDResults.n_bootstrap
    
    
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.TROP.rst b/docs/api/_autosummary/diff_diff.TROP.rst
new file mode 100644
index 0000000..c223329
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.TROP.rst
@@ -0,0 +1,31 @@
+﻿diff\_diff.TROP
+===============
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: TROP
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~TROP.__init__
+      ~TROP.fit
+      ~TROP.get_params
+      ~TROP.set_params
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~TROP.CONVERGENCE_TOL_SVD
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.TROPResults.rst b/docs/api/_autosummary/diff_diff.TROPResults.rst
new file mode 100644
index 0000000..a111fe3
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.TROPResults.rst
@@ -0,0 +1,59 @@
+﻿diff\_diff.TROPResults
+======================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: TROPResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~TROPResults.__init__
+      ~TROPResults.get_time_effects_df
+      ~TROPResults.get_treatment_effects_df
+      ~TROPResults.get_unit_effects_df
+      ~TROPResults.print_summary
+      ~TROPResults.summary
+      ~TROPResults.to_dataframe
+      ~TROPResults.to_dict
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~TROPResults.alpha
+      ~TROPResults.bootstrap_distribution
+      ~TROPResults.is_significant
+      ~TROPResults.n_bootstrap
+      ~TROPResults.n_post_periods
+      ~TROPResults.n_pre_periods
+      ~TROPResults.significance_stars
+      ~TROPResults.att
+      ~TROPResults.se
+      ~TROPResults.t_stat
+      ~TROPResults.p_value
+      ~TROPResults.conf_int
+      ~TROPResults.n_obs
+      ~TROPResults.n_treated
+      ~TROPResults.n_control
+      ~TROPResults.n_treated_obs
+      ~TROPResults.unit_effects
+      ~TROPResults.time_effects
+      ~TROPResults.treatment_effects
+      ~TROPResults.lambda_time
+      ~TROPResults.lambda_unit
+      ~TROPResults.lambda_nn
+      ~TROPResults.factor_matrix
+      ~TROPResults.effective_rank
+      ~TROPResults.loocv_score
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.TripleDifference.rst b/docs/api/_autosummary/diff_diff.TripleDifference.rst
new file mode 100644
index 0000000..3762132
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.TripleDifference.rst
@@ -0,0 +1,27 @@
+﻿diff\_diff.TripleDifference
+===========================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: TripleDifference
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~TripleDifference.__init__
+      ~TripleDifference.fit
+      ~TripleDifference.get_params
+      ~TripleDifference.print_summary
+      ~TripleDifference.set_params
+      ~TripleDifference.summary
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.TripleDifferenceResults.rst b/docs/api/_autosummary/diff_diff.TripleDifferenceResults.rst
new file mode 100644
index 0000000..cfaf236
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.TripleDifferenceResults.rst
@@ -0,0 +1,52 @@
+﻿diff\_diff.TripleDifferenceResults
+==================================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: TripleDifferenceResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~TripleDifferenceResults.__init__
+      ~TripleDifferenceResults.print_summary
+      ~TripleDifferenceResults.summary
+      ~TripleDifferenceResults.to_dataframe
+      ~TripleDifferenceResults.to_dict
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~TripleDifferenceResults.alpha
+      ~TripleDifferenceResults.covariate_balance
+      ~TripleDifferenceResults.group_means
+      ~TripleDifferenceResults.inference_method
+      ~TripleDifferenceResults.is_significant
+      ~TripleDifferenceResults.n_bootstrap
+      ~TripleDifferenceResults.n_clusters
+      ~TripleDifferenceResults.pscore_stats
+      ~TripleDifferenceResults.r_squared
+      ~TripleDifferenceResults.significance_stars
+      ~TripleDifferenceResults.att
+      ~TripleDifferenceResults.se
+      ~TripleDifferenceResults.t_stat
+      ~TripleDifferenceResults.p_value
+      ~TripleDifferenceResults.conf_int
+      ~TripleDifferenceResults.n_obs
+      ~TripleDifferenceResults.n_treated_eligible
+      ~TripleDifferenceResults.n_treated_ineligible
+      ~TripleDifferenceResults.n_control_eligible
+      ~TripleDifferenceResults.n_control_ineligible
+      ~TripleDifferenceResults.estimation_method
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.TwoStageBootstrapResults.rst b/docs/api/_autosummary/diff_diff.TwoStageBootstrapResults.rst
new file mode 100644
index 0000000..f38b4e4
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.TwoStageBootstrapResults.rst
@@ -0,0 +1,40 @@
+﻿diff\_diff.TwoStageBootstrapResults
+===================================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: TwoStageBootstrapResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~TwoStageBootstrapResults.__init__
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~TwoStageBootstrapResults.bootstrap_distribution
+      ~TwoStageBootstrapResults.event_study_cis
+      ~TwoStageBootstrapResults.event_study_p_values
+      ~TwoStageBootstrapResults.event_study_ses
+      ~TwoStageBootstrapResults.group_cis
+      ~TwoStageBootstrapResults.group_p_values
+      ~TwoStageBootstrapResults.group_ses
+      ~TwoStageBootstrapResults.n_bootstrap
+      ~TwoStageBootstrapResults.weight_type
+      ~TwoStageBootstrapResults.alpha
+      ~TwoStageBootstrapResults.overall_att_se
+      ~TwoStageBootstrapResults.overall_att_ci
+      ~TwoStageBootstrapResults.overall_att_p_value
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.TwoStageDiD.rst b/docs/api/_autosummary/diff_diff.TwoStageDiD.rst
new file mode 100644
index 0000000..648b813
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.TwoStageDiD.rst
@@ -0,0 +1,27 @@
+﻿diff\_diff.TwoStageDiD
+======================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: TwoStageDiD
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~TwoStageDiD.__init__
+      ~TwoStageDiD.fit
+      ~TwoStageDiD.get_params
+      ~TwoStageDiD.print_summary
+      ~TwoStageDiD.set_params
+      ~TwoStageDiD.summary
+   
+   
+
+   
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.TwoStageDiDResults.rst b/docs/api/_autosummary/diff_diff.TwoStageDiDResults.rst
new file mode 100644
index 0000000..624a26d
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.TwoStageDiDResults.rst
@@ -0,0 +1,49 @@
+﻿diff\_diff.TwoStageDiDResults
+=============================
+
+.. currentmodule:: diff_diff
+
+.. autoclass:: TwoStageDiDResults
+
+   
+   .. automethod:: __init__
+
+   
+   .. rubric:: Methods
+
+   .. autosummary::
+   
+      ~TwoStageDiDResults.__init__
+      ~TwoStageDiDResults.print_summary
+      ~TwoStageDiDResults.summary
+      ~TwoStageDiDResults.to_dataframe
+   
+   
+
+   
+   
+   .. rubric:: Attributes
+
+   .. autosummary::
+   
+      ~TwoStageDiDResults.alpha
+      ~TwoStageDiDResults.bootstrap_results
+      ~TwoStageDiDResults.is_significant
+      ~TwoStageDiDResults.significance_stars
+      ~TwoStageDiDResults.treatment_effects
+      ~TwoStageDiDResults.overall_att
+      ~TwoStageDiDResults.overall_se
+      ~TwoStageDiDResults.overall_t_stat
+      ~TwoStageDiDResults.overall_p_value
+      ~TwoStageDiDResults.overall_conf_int
+      ~TwoStageDiDResults.event_study_effects
+      ~TwoStageDiDResults.group_effects
+      ~TwoStageDiDResults.groups
+      ~TwoStageDiDResults.time_periods
+      ~TwoStageDiDResults.n_obs
+      ~TwoStageDiDResults.n_treated_obs
+      ~TwoStageDiDResults.n_untreated_obs
+      ~TwoStageDiDResults.n_treated_units
+      ~TwoStageDiDResults.n_control_units
+   
+   
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.TwoWayFixedEffects.rst b/docs/api/_autosummary/diff_diff.TwoWayFixedEffects.rst
index ef9d860..8eddb62 100644
--- a/docs/api/_autosummary/diff_diff.TwoWayFixedEffects.rst
+++ b/docs/api/_autosummary/diff_diff.TwoWayFixedEffects.rst
@@ -14,6 +14,7 @@
    .. autosummary::
    
       ~TwoWayFixedEffects.__init__
+      ~TwoWayFixedEffects.decompose
       ~TwoWayFixedEffects.fit
       ~TwoWayFixedEffects.get_params
       ~TwoWayFixedEffects.predict
diff --git a/docs/api/_autosummary/diff_diff.clear_cache.rst b/docs/api/_autosummary/diff_diff.clear_cache.rst
new file mode 100644
index 0000000..3a0a778
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.clear_cache.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.clear\_cache
+=======================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: clear_cache
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.compute_mde.rst b/docs/api/_autosummary/diff_diff.compute_mde.rst
new file mode 100644
index 0000000..2962fb3
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.compute_mde.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.compute\_mde
+=======================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: compute_mde
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.compute_mdv.rst b/docs/api/_autosummary/diff_diff.compute_mdv.rst
new file mode 100644
index 0000000..a015be7
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.compute_mdv.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.compute\_mdv
+=======================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: compute_mdv
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.compute_power.rst b/docs/api/_autosummary/diff_diff.compute_power.rst
new file mode 100644
index 0000000..5afd424
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.compute_power.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.compute\_power
+=========================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: compute_power
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.compute_pretrends_power.rst b/docs/api/_autosummary/diff_diff.compute_pretrends_power.rst
new file mode 100644
index 0000000..776dd82
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.compute_pretrends_power.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.compute\_pretrends\_power
+====================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: compute_pretrends_power
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.compute_sample_size.rst b/docs/api/_autosummary/diff_diff.compute_sample_size.rst
new file mode 100644
index 0000000..e26beba
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.compute_sample_size.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.compute\_sample\_size
+================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: compute_sample_size
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.generate_continuous_did_data.rst b/docs/api/_autosummary/diff_diff.generate_continuous_did_data.rst
new file mode 100644
index 0000000..7302b87
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.generate_continuous_did_data.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.generate\_continuous\_did\_data
+==========================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: generate_continuous_did_data
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.generate_ddd_data.rst b/docs/api/_autosummary/diff_diff.generate_ddd_data.rst
new file mode 100644
index 0000000..9c511d3
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.generate_ddd_data.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.generate\_ddd\_data
+==============================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: generate_ddd_data
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.generate_event_study_data.rst b/docs/api/_autosummary/diff_diff.generate_event_study_data.rst
new file mode 100644
index 0000000..15d7134
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.generate_event_study_data.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.generate\_event\_study\_data
+=======================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: generate_event_study_data
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.generate_factor_data.rst b/docs/api/_autosummary/diff_diff.generate_factor_data.rst
new file mode 100644
index 0000000..f4c3c0f
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.generate_factor_data.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.generate\_factor\_data
+=================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: generate_factor_data
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.generate_panel_data.rst b/docs/api/_autosummary/diff_diff.generate_panel_data.rst
new file mode 100644
index 0000000..48f2504
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.generate_panel_data.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.generate\_panel\_data
+================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: generate_panel_data
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.generate_staggered_data.rst b/docs/api/_autosummary/diff_diff.generate_staggered_data.rst
new file mode 100644
index 0000000..18686f5
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.generate_staggered_data.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.generate\_staggered\_data
+====================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: generate_staggered_data
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.list_datasets.rst b/docs/api/_autosummary/diff_diff.list_datasets.rst
new file mode 100644
index 0000000..25f2715
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.list_datasets.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.list\_datasets
+=========================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: list_datasets
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.load_card_krueger.rst b/docs/api/_autosummary/diff_diff.load_card_krueger.rst
new file mode 100644
index 0000000..9a8191c
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.load_card_krueger.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.load\_card\_krueger
+==============================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: load_card_krueger
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.load_castle_doctrine.rst b/docs/api/_autosummary/diff_diff.load_castle_doctrine.rst
new file mode 100644
index 0000000..65729d6
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.load_castle_doctrine.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.load\_castle\_doctrine
+=================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: load_castle_doctrine
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.load_dataset.rst b/docs/api/_autosummary/diff_diff.load_dataset.rst
new file mode 100644
index 0000000..a39ef3c
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.load_dataset.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.load\_dataset
+========================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: load_dataset
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.load_divorce_laws.rst b/docs/api/_autosummary/diff_diff.load_divorce_laws.rst
new file mode 100644
index 0000000..7ba295a
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.load_divorce_laws.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.load\_divorce\_laws
+==============================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: load_divorce_laws
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.load_mpdta.rst b/docs/api/_autosummary/diff_diff.load_mpdta.rst
new file mode 100644
index 0000000..bda15b2
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.load_mpdta.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.load\_mpdta
+======================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: load_mpdta
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.plot_bacon.rst b/docs/api/_autosummary/diff_diff.plot_bacon.rst
new file mode 100644
index 0000000..b02c5bb
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.plot_bacon.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.plot\_bacon
+======================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: plot_bacon
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.plot_power_curve.rst b/docs/api/_autosummary/diff_diff.plot_power_curve.rst
new file mode 100644
index 0000000..b70ac6c
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.plot_power_curve.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.plot\_power\_curve
+=============================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: plot_power_curve
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.plot_pretrends_power.rst b/docs/api/_autosummary/diff_diff.plot_pretrends_power.rst
new file mode 100644
index 0000000..9bb17a2
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.plot_pretrends_power.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.plot\_pretrends\_power
+=================================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: plot_pretrends_power
\ No newline at end of file
diff --git a/docs/api/_autosummary/diff_diff.simulate_power.rst b/docs/api/_autosummary/diff_diff.simulate_power.rst
new file mode 100644
index 0000000..76c35f4
--- /dev/null
+++ b/docs/api/_autosummary/diff_diff.simulate_power.rst
@@ -0,0 +1,6 @@
+﻿diff\_diff.simulate\_power
+==========================
+
+.. currentmodule:: diff_diff
+
+.. autofunction:: simulate_power
\ No newline at end of file
diff --git a/docs/api/bacon.rst b/docs/api/bacon.rst
new file mode 100644
index 0000000..9f7e388
--- /dev/null
+++ b/docs/api/bacon.rst
@@ -0,0 +1,151 @@
+Bacon Decomposition (Goodman-Bacon 2021)
+=========================================
+
+Diagnostic decomposition of Two-Way Fixed Effects (TWFE) estimators for
+staggered treatment designs.
+
+This module implements the Goodman-Bacon (2021) decomposition, which reveals
+that a TWFE estimate with variation in treatment timing is a weighted average
+of all possible 2x2 Difference-in-Differences comparisons. The decomposition
+exposes the implicit comparisons that drive the TWFE estimate -- including
+potentially problematic "forbidden comparisons" where already-treated units
+serve as controls -- and quantifies their relative importance.
+
+**When to use BaconDecomposition:**
+
+- You have a staggered adoption design and want to diagnose whether the TWFE
+  estimate is driven by clean or problematic comparisons
+- You need to assess the severity of heterogeneous treatment effect bias in
+  existing TWFE results
+- You want to understand *why* TWFE and robust estimators (e.g.,
+  Callaway-Sant'Anna) produce different estimates
+- You are deciding whether a simple TWFE specification is adequate or whether
+  a robust staggered estimator is needed
+
+**Reference:** Goodman-Bacon, A. (2021). Difference-in-differences with
+variation in treatment timing. *Journal of Econometrics*, 225(2), 254-277.
+
+.. module:: diff_diff.bacon
+
+BaconDecomposition
+------------------
+
+Main estimator class for the Goodman-Bacon decomposition.
+
+.. autoclass:: diff_diff.BaconDecomposition
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
+
+   .. rubric:: Methods
+
+   .. autosummary::
+
+      ~BaconDecomposition.fit
+      ~BaconDecomposition.get_params
+      ~BaconDecomposition.set_params
+
+BaconDecompositionResults
+-------------------------
+
+Results container for the Bacon decomposition.
+
+.. autoclass:: diff_diff.bacon.BaconDecompositionResults
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+   .. rubric:: Methods
+
+   .. autosummary::
+
+      ~BaconDecompositionResults.summary
+      ~BaconDecompositionResults.print_summary
+      ~BaconDecompositionResults.to_dataframe
+      ~BaconDecompositionResults.weight_by_type
+      ~BaconDecompositionResults.effect_by_type
+
+Comparison2x2
+-------------
+
+Container for an individual 2x2 DiD comparison within the decomposition.
+
+.. autoclass:: diff_diff.bacon.Comparison2x2
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Convenience Function
+--------------------
+
+.. autofunction:: diff_diff.bacon_decompose
+
+Example Usage
+-------------
+
+Basic usage::
+
+    from diff_diff import BaconDecomposition, generate_staggered_data
+
+    data = generate_staggered_data(n_units=200, n_periods=12,
+                                    cohort_periods=[4, 6, 8], seed=42)
+
+    bacon = BaconDecomposition()
+    results = bacon.fit(data, outcome='outcome', unit='unit',
+                        time='period', first_treat='first_treat')
+    results.print_summary()
+
+Visualizing with ``plot_bacon``::
+
+    from diff_diff import plot_bacon
+
+    # Scatter plot of 2x2 estimates vs weights, colored by comparison type
+    ax = plot_bacon(results)
+    ax.figure.show()
+
+Interpreting the decomposition::
+
+    # Convert to DataFrame for detailed inspection
+    df = results.to_dataframe()
+    print(df[['treated_group', 'control_group', 'comparison_type',
+              'estimate', 'weight']])
+
+    # Check weight breakdown by comparison type
+    weights = results.weight_by_type()
+    print(f"Treated vs Never-treated: {weights['treated_vs_never']:.1%}")
+    print(f"Earlier vs Later:         {weights['earlier_vs_later']:.1%}")
+    print(f"Later vs Earlier:         {weights['later_vs_earlier']:.1%}")
+
+    # Compare weighted average effects across comparison types
+    effects = results.effect_by_type()
+    for comp_type, effect in effects.items():
+        if effect is not None:
+            print(f"  {comp_type}: {effect:.4f}")
+
+Using exact weights for publication-quality results::
+
+    bacon = BaconDecomposition(weights='exact')
+    results = bacon.fit(data, outcome='outcome', unit='unit',
+                        time='period', first_treat='first_treat')
+
+    # Verify the weighted sum closely matches the TWFE estimate
+    print(f"TWFE estimate:       {results.twfe_estimate:.4f}")
+    print(f"Decomposition error: {results.decomposition_error:.6f}")
+
+When Is TWFE Reliable?
+----------------------
+
+The Bacon decomposition helps answer whether a standard TWFE regression is
+adequate for a particular dataset. As a rule of thumb:
+
+- **TWFE is likely reliable** when the weight on "later vs earlier" (forbidden)
+  comparisons is small, or when 2x2 estimates are similar across all comparison
+  types. This suggests treatment effect heterogeneity is not meaningfully
+  biasing the TWFE estimate.
+
+- **TWFE may be unreliable** when forbidden comparisons carry substantial weight
+  *and* their estimates differ markedly from the clean comparisons. In this
+  case, consider using a robust staggered estimator such as
+  :class:`~diff_diff.CallawaySantAnna`, :class:`~diff_diff.SunAbraham`, or
+  :class:`~diff_diff.StackedDiD`.
diff --git a/docs/api/datasets.rst b/docs/api/datasets.rst
new file mode 100644
index 0000000..d74f2c6
--- /dev/null
+++ b/docs/api/datasets.rst
@@ -0,0 +1,165 @@
+Datasets
+========
+
+Built-in real-world datasets from published studies for examples, tutorials, and testing.
+
+.. module:: diff_diff.datasets
+
+All datasets are downloaded from public sources on first use and cached locally
+at ``~/.cache/diff_diff/datasets/``. Pass ``force_download=True`` to any loader
+to refresh the cache. If the download fails and a cached copy exists, the cached
+version is used automatically.
+
+Dataset Loaders
+---------------
+
+load_card_krueger
+~~~~~~~~~~~~~~~~~
+
+Card & Krueger (1994) minimum wage study. Classic 2x2 DiD comparing fast-food
+employment in New Jersey (treated) and Pennsylvania (control) around NJ's 1992
+minimum wage increase.
+
+.. autofunction:: load_card_krueger
+
+Example
+^^^^^^^
+
+.. code-block:: python
+
+   from diff_diff.datasets import load_card_krueger
+   from diff_diff import DifferenceInDifferences
+
+   ck = load_card_krueger()
+
+   # Reshape to long format for DiD estimation
+   ck_long = ck.melt(
+       id_vars=['store_id', 'state', 'treated'],
+       value_vars=['emp_pre', 'emp_post'],
+       var_name='period', value_name='employment'
+   )
+   ck_long['post'] = (ck_long['period'] == 'emp_post').astype(int)
+
+   did = DifferenceInDifferences()
+   results = did.fit(ck_long, outcome='employment', treatment='treated', time='post')
+
+load_castle_doctrine
+~~~~~~~~~~~~~~~~~~~~
+
+Castle doctrine (Stand Your Ground) gun law study. Staggered adoption of
+self-defense law expansions across U.S. states (2000--2010), suitable for
+Callaway--Sant'Anna or Sun--Abraham estimation.
+
+.. autofunction:: load_castle_doctrine
+
+Example
+^^^^^^^
+
+.. code-block:: python
+
+   from diff_diff.datasets import load_castle_doctrine
+   from diff_diff import CallawaySantAnna
+
+   castle = load_castle_doctrine()
+   cs = CallawaySantAnna(control_group="never_treated")
+   results = cs.fit(
+       castle,
+       outcome="homicide_rate",
+       unit="state",
+       time="year",
+       first_treat="first_treat"
+   )
+
+load_divorce_laws
+~~~~~~~~~~~~~~~~~
+
+Unilateral (no-fault) divorce law reforms. Staggered adoption across U.S.
+states (1968--1988) from Stevenson & Wolfers (2006), with outcomes for divorce
+rate, female labor force participation, and female suicide rate.
+
+.. autofunction:: load_divorce_laws
+
+Example
+^^^^^^^
+
+.. code-block:: python
+
+   from diff_diff.datasets import load_divorce_laws
+   from diff_diff import CallawaySantAnna
+
+   divorce = load_divorce_laws()
+   cs = CallawaySantAnna(control_group="never_treated")
+   results = cs.fit(
+       divorce,
+       outcome="divorce_rate",
+       unit="state",
+       time="year",
+       first_treat="first_treat"
+   )
+
+load_mpdta
+~~~~~~~~~~
+
+Minimum wage panel data for training (Callaway & Sant'Anna 2021). Simulated
+county-level employment data with staggered minimum wage increases (2003--2007),
+from the R ``did`` package.
+
+.. autofunction:: load_mpdta
+
+Example
+^^^^^^^
+
+.. code-block:: python
+
+   from diff_diff.datasets import load_mpdta
+   from diff_diff import CallawaySantAnna
+
+   mpdta = load_mpdta()
+   cs = CallawaySantAnna()
+   results = cs.fit(
+       mpdta,
+       outcome="lemp",
+       unit="countyreal",
+       time="year",
+       first_treat="first_treat"
+   )
+
+Utility Functions
+-----------------
+
+load_dataset
+~~~~~~~~~~~~
+
+Generic loader that fetches a dataset by name.
+
+.. autofunction:: load_dataset
+
+list_datasets
+~~~~~~~~~~~~~
+
+List all available datasets with descriptions.
+
+.. autofunction:: list_datasets
+
+clear_cache
+~~~~~~~~~~~~
+
+Remove all cached dataset files from ``~/.cache/diff_diff/datasets/``.
+
+.. autofunction:: clear_cache
+
+Listing and Loading Datasets
+----------------------------
+
+.. code-block:: python
+
+   from diff_diff.datasets import list_datasets, load_dataset
+
+   # See what's available
+   for name, description in list_datasets().items():
+       print(f"{name}: {description}")
+
+   # Load by name
+   df = load_dataset("card_krueger")
+   print(df.shape)
+   print(df.columns.tolist())
diff --git a/docs/api/diagnostics.rst b/docs/api/diagnostics.rst
index afc16e4..218e035 100644
--- a/docs/api/diagnostics.rst
+++ b/docs/api/diagnostics.rst
@@ -30,14 +30,12 @@ Example
    result = placebo_timing_test(
        data,
        outcome='y',
-       treated='treated',
+       treatment='treated',
        time='period',
-       unit='unit_id',
-       true_treatment_start=5,
-       placebo_treatment_start=3  # Test earlier period
+       fake_treatment_period=3  # Test earlier period
    )
 
-   print(f"Placebo effect: {result.effect:.3f}")
+   print(f"Placebo effect: {result.placebo_effect:.3f}")
    print(f"p-value: {result.p_value:.3f}")
 
 placebo_group_test
@@ -60,12 +58,11 @@ Example
        outcome='y',
        time='period',
        unit='unit_id',
-       treated='treated',
-       post='post'
+       fake_treated_units=[10, 11, 12]  # Assign some control units as fake-treated
    )
 
    # Should find no effect if parallel trends holds
-   print(f"Placebo effect: {result.effect:.3f}")
+   print(f"Placebo effect: {result.placebo_effect:.3f}")
 
 permutation_test
 ----------------
@@ -79,13 +76,15 @@ Example
 
 .. code-block:: python
 
-   from diff_diff import permutation_test
+   from diff_diff import permutation_test, generate_did_data
 
+   panel = generate_did_data(n_units=100, n_periods=10, treatment_effect=2.0)
    result = permutation_test(
-       data,
-       outcome='y',
-       treated='treated',
-       post='post',
+       panel,
+       outcome='outcome',
+       treatment='treated',
+       time='post',
+       unit='unit',
        n_permutations=1000
    )
 
@@ -103,18 +102,20 @@ Example
 
 .. code-block:: python
 
-   from diff_diff import leave_one_out_test
+   from diff_diff import leave_one_out_test, generate_did_data
 
+   panel = generate_did_data(n_units=100, n_periods=10, treatment_effect=2.0)
    result = leave_one_out_test(
-       data,
-       outcome='y',
-       treated='treated',
-       post='post',
-       unit='unit_id'
+       panel,
+       outcome='outcome',
+       treatment='treated',
+       time='post',
+       unit='unit'
    )
 
    # Check if results are driven by single units
-   print(f"Effect range: [{result.min_effect:.3f}, {result.max_effect:.3f}]")
+   loo = result.leave_one_out_effects
+   print(f"Effect range: [{min(loo.values()):.3f}, {max(loo.values()):.3f}]")
 
 run_all_placebo_tests
 ---------------------
diff --git a/docs/api/estimators.rst b/docs/api/estimators.rst
index e1480f9..54bf2c3 100644
--- a/docs/api/estimators.rst
+++ b/docs/api/estimators.rst
@@ -81,38 +81,3 @@ Synthetic control combined with DiD (Arkhangelsky et al. 2021).
    :show-inheritance:
    :inherited-members:
 
-TripleDifference (alias: ``DDD``)
-----------------------------------
-
-Triple Difference (DDD) estimator for settings where treatment requires two criteria
-(Ortiz-Villavicencio & Sant'Anna, 2025).
-
-.. module:: diff_diff.triple_diff
-
-.. autoclass:: diff_diff.TripleDifference
-   :members:
-   :undoc-members:
-   :show-inheritance:
-   :inherited-members:
-
-   .. rubric:: Methods
-
-   .. autosummary::
-
-      ~TripleDifference.fit
-      ~TripleDifference.get_params
-      ~TripleDifference.set_params
-
-TripleDifferenceResults
-~~~~~~~~~~~~~~~~~~~~~~~
-
-Results container for Triple Difference estimation.
-
-.. autoclass:: diff_diff.triple_diff.TripleDifferenceResults
-   :members:
-   :undoc-members:
-
-Convenience Function
-~~~~~~~~~~~~~~~~~~~~
-
-.. autofunction:: diff_diff.triple_difference
diff --git a/docs/api/honest_did.rst b/docs/api/honest_did.rst
index ab0d626..afddfa3 100644
--- a/docs/api/honest_did.rst
+++ b/docs/api/honest_did.rst
@@ -44,20 +44,20 @@ Example
 
 .. code-block:: python
 
-   from diff_diff import MultiPeriodDiD, HonestDiD, DeltaRM
+   from diff_diff import MultiPeriodDiD, HonestDiD
 
    # First fit an event study
-   model = MultiPeriodDiD(reference_period=-1)
-   results = model.fit(data, outcome='y', treated='treated',
-                       time='period', unit='unit_id', treatment_start=5)
+   model = MultiPeriodDiD()
+   results = model.fit(data, outcome='y', treatment='treated',
+                       time='period', unit='unit_id',
+                       post_periods=[5, 6, 7], reference_period=4)
 
    # Compute bounds under relative magnitudes restriction
-   honest = HonestDiD(delta=DeltaRM(M_bar=1.0))
+   honest = HonestDiD(method='relative_magnitude', M=1.0)
    bounds = honest.fit(results)
 
-   print(f"Original CI: [{results.att - 1.96*results.se:.3f}, "
-         f"{results.att + 1.96*results.se:.3f}]")
-   print(f"Robust CI: [{bounds.robust_ci[0]:.3f}, {bounds.robust_ci[1]:.3f}]")
+   print(f"Original estimate: {bounds.original_estimate:.3f}")
+   print(f"Robust CI: [{bounds.ci_lb:.3f}, {bounds.ci_ub:.3f}]")
 
 HonestDiDResults
 ----------------
@@ -138,36 +138,35 @@ Complete Example
    from diff_diff import (
        MultiPeriodDiD,
        HonestDiD,
-       DeltaRM,
-       DeltaSD,
        plot_sensitivity,
        plot_honest_event_study,
    )
 
    # Fit event study
-   model = MultiPeriodDiD(reference_period=-1)
-   results = model.fit(data, outcome='y', treated='treated',
-                       time='period', unit='unit_id', treatment_start=5)
+   model = MultiPeriodDiD()
+   results = model.fit(data, outcome='y', treatment='treated',
+                       time='period', unit='unit_id',
+                       post_periods=[5, 6, 7], reference_period=4)
 
    # Sensitivity analysis under relative magnitudes
-   honest_rm = HonestDiD(delta=DeltaRM(M_bar=1.0))
+   honest_rm = HonestDiD(method='relative_magnitude', M=1.0)
    sensitivity_rm = honest_rm.sensitivity_analysis(
        results,
-       M_grid=np.linspace(0, 2, 21)
+       M_grid=np.linspace(0, 2, 21).tolist()
    )
 
    # Find breakdown value
    breakdown = honest_rm.breakdown_value(results)
-   print(f"Breakdown M̄: {breakdown:.3f}")
+   print(f"Breakdown M̄: {breakdown}")
 
    # Plot sensitivity
-   fig1 = plot_sensitivity(sensitivity_rm)
-   fig1.savefig('sensitivity_rm.png')
+   ax1 = plot_sensitivity(sensitivity_rm)
+   ax1.figure.savefig('sensitivity_rm.png')
 
    # Event study with honest CIs
    bounds = honest_rm.fit(results)
-   fig2 = plot_honest_event_study(results, bounds)
-   fig2.savefig('honest_event_study.png')
+   ax2 = plot_honest_event_study(bounds)
+   ax2.figure.savefig('honest_event_study.png')
 
 References
 ----------
diff --git a/docs/api/index.rst b/docs/api/index.rst
index 5a57ee6..d139b76 100644
--- a/docs/api/index.rst
+++ b/docs/api/index.rst
@@ -24,6 +24,8 @@ Core estimator classes for DiD analysis:
    diff_diff.TROP
    diff_diff.ContinuousDiD
    diff_diff.EfficientDiD
+   diff_diff.TwoStageDiD
+   diff_diff.BaconDecomposition
 
 Results Classes
 ---------------
@@ -47,11 +49,15 @@ Result containers returned by estimators:
    diff_diff.ImputationBootstrapResults
    diff_diff.TripleDifferenceResults
    diff_diff.StackedDiDResults
-   diff_diff.trop.TROPResults
+   diff_diff.TROPResults
    diff_diff.ContinuousDiDResults
    diff_diff.DoseResponseCurve
    diff_diff.EfficientDiDResults
    diff_diff.EDiDBootstrapResults
+   diff_diff.TwoStageDiDResults
+   diff_diff.TwoStageBootstrapResults
+   diff_diff.BaconDecompositionResults
+   diff_diff.Comparison2x2
 
 Visualization
 -------------
@@ -173,6 +179,11 @@ Utilities for preparing DiD data:
 
    diff_diff.generate_did_data
    diff_diff.generate_continuous_did_data
+   diff_diff.generate_staggered_data
+   diff_diff.generate_event_study_data
+   diff_diff.generate_ddd_data
+   diff_diff.generate_factor_data
+   diff_diff.generate_panel_data
    diff_diff.make_treatment_indicator
    diff_diff.make_post_indicator
    diff_diff.wide_to_long
@@ -183,6 +194,23 @@ Utilities for preparing DiD data:
    diff_diff.aggregate_to_cohorts
    diff_diff.rank_control_units
 
+Datasets
+--------
+
+Built-in datasets for examples and testing:
+
+.. autosummary::
+   :toctree: _autosummary
+   :nosignatures:
+
+   diff_diff.load_card_krueger
+   diff_diff.load_castle_doctrine
+   diff_diff.load_divorce_laws
+   diff_diff.load_mpdta
+   diff_diff.load_dataset
+   diff_diff.list_datasets
+   diff_diff.clear_cache
+
 Module Documentation
 --------------------
 
@@ -199,6 +227,8 @@ Detailed documentation by module:
    trop
    continuous_did
    efficient_did
+   two_stage
+   bacon
    results
    visualization
    diagnostics
@@ -207,3 +237,4 @@ Detailed documentation by module:
    pretrends
    utils
    prep
+   datasets
diff --git a/docs/api/power.rst b/docs/api/power.rst
index cc3420a..0e17b75 100644
--- a/docs/api/power.rst
+++ b/docs/api/power.rst
@@ -157,8 +157,8 @@ Complete Example
 
    # Power curve
    pa = PowerAnalysis(n_treated=100, n_control=100, n_pre=4, n_post=4, sigma=1.0)
-   fig = plot_power_curve(pa, effect_range=(0, 1), n_points=50)
-   fig.savefig('power_curve.png')
+   ax = plot_power_curve(pa, effect_range=(0, 1), n_points=50)
+   ax.figure.savefig('power_curve.png')
 
 See Also
 --------
diff --git a/docs/api/prep.rst b/docs/api/prep.rst
index 5a4fcd3..462e82d 100644
--- a/docs/api/prep.rst
+++ b/docs/api/prep.rst
@@ -27,7 +27,7 @@ Example
        n_units=100,
        n_periods=10,
        treatment_effect=5.0,
-       treatment_start=5,
+       treatment_period=5,
        treatment_fraction=0.5,
        noise_sd=1.0
    )
@@ -35,6 +35,62 @@ Example
    print(data.head())
    # Columns: unit_id, period, outcome, treated, post
 
+generate_staggered_data
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Generate synthetic staggered adoption data for testing.
+
+.. autofunction:: diff_diff.generate_staggered_data
+
+Example
+^^^^^^^
+
+.. code-block:: python
+
+   from diff_diff import generate_staggered_data
+
+   data = generate_staggered_data(
+       n_units=200,
+       n_periods=10,
+       cohort_periods=[4, 6, 8],
+       seed=42
+   )
+
+generate_event_study_data
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Generate synthetic event study data for testing.
+
+.. autofunction:: diff_diff.generate_event_study_data
+
+generate_ddd_data
+~~~~~~~~~~~~~~~~~
+
+Generate synthetic Triple Difference data.
+
+.. autofunction:: diff_diff.generate_ddd_data
+
+generate_factor_data
+~~~~~~~~~~~~~~~~~~~~
+
+Generate synthetic data with factor structure for TROP testing.
+
+.. autofunction:: diff_diff.generate_factor_data
+
+generate_panel_data
+~~~~~~~~~~~~~~~~~~~
+
+Generate generic synthetic panel data.
+
+.. autofunction:: diff_diff.generate_panel_data
+
+generate_continuous_did_data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Generate synthetic continuous treatment DiD data with known dose-response.
+
+.. autofunction:: diff_diff.generate_continuous_did_data
+
 Indicator Creation
 ------------------
 
@@ -53,17 +109,18 @@ Example
    from diff_diff import make_treatment_indicator
 
    # From categorical
-   data['treated'] = make_treatment_indicator(
+   data = make_treatment_indicator(
        data,
        column='group',
-       treated_value='treatment'
+       treated_values='treatment'
    )
 
    # From numeric threshold
-   data['high_exposure'] = make_treatment_indicator(
+   data = make_treatment_indicator(
        data,
        column='exposure',
-       threshold=0.5
+       threshold=0.5,
+       new_column='high_exposure'
    )
 
 make_post_indicator
@@ -80,7 +137,7 @@ Example
 
    from diff_diff import make_post_indicator
 
-   data['post'] = make_post_indicator(
+   data = make_post_indicator(
        data,
        time_column='period',
        treatment_start=5
@@ -129,17 +186,17 @@ Example
    # Fill missing periods with NaN
    balanced = balance_panel(
        data,
-       unit='unit_id',
-       time='period',
+       unit_column='unit_id',
+       time_column='period',
        method='fill'
    )
 
-   # Or drop units with missing periods
+   # Or keep only units with all periods (default)
    balanced = balance_panel(
        data,
-       unit='unit_id',
-       time='period',
-       method='drop'
+       unit_column='unit_id',
+       time_column='period',
+       method='inner'
    )
 
 Staggered Adoption Utilities
@@ -159,13 +216,13 @@ Example
 
    from diff_diff import create_event_time
 
-   data['event_time'] = create_event_time(
+   data = create_event_time(
        data,
-       time_col='period',
-       first_treat_col='first_treatment'
+       time_column='period',
+       treatment_time_column='first_treat'
    )
 
-   # event_time = period - first_treatment
+   # event_time = period - first_treat
    # Negative values: pre-treatment
    # Zero: treatment period
    # Positive values: post-treatment
@@ -187,10 +244,10 @@ Example
 
    cohort_data = aggregate_to_cohorts(
        data,
-       outcome='outcome',
-       time='period',
-       cohort='first_treatment',
-       agg_func='mean'
+       unit_column='unit_id',
+       time_column='period',
+       treatment_column='first_treat',
+       outcome='outcome'
    )
 
 Data Validation
@@ -210,18 +267,19 @@ Example
 
    from diff_diff import validate_did_data
 
-   is_valid, issues = validate_did_data(
+   result = validate_did_data(
        data,
        outcome='outcome',
-       treated='treated',
-       post='post',
-       unit='unit_id',
-       time='period'
+       treatment='treated',
+       time='period',
+       unit='unit_id'
    )
 
-   if not is_valid:
-       for issue in issues:
-           print(f"Issue: {issue}")
+   if not result['valid']:
+       for error in result['errors']:
+           print(f"Error: {error}")
+       for warning in result['warnings']:
+           print(f"Warning: {warning}")
 
 summarize_did_data
 ~~~~~~~~~~~~~~~~~~
@@ -240,15 +298,12 @@ Example
    summary = summarize_did_data(
        data,
        outcome='outcome',
-       treated='treated',
-       post='post',
-       unit='unit_id',
-       time='period'
+       treatment='treated',
+       time='period',
+       unit='unit_id'
    )
 
-   print(f"N units: {summary['n_units']}")
-   print(f"N periods: {summary['n_periods']}")
-   print(f"Treatment fraction: {summary['treatment_fraction']:.1%}")
+   print(summary)
 
 Control Unit Selection
 ----------------------
@@ -265,17 +320,17 @@ Example
 
 .. code-block:: python
 
-   from diff_diff import rank_control_units
+   from diff_diff import rank_control_units, generate_did_data
 
+   panel = generate_did_data(n_units=100, n_periods=10, treatment_effect=2.0)
    ranked = rank_control_units(
-       data,
-       outcome='outcome',
-       unit='unit_id',
-       time='period',
-       treated='treated',
-       pre_periods=4,
-       method='correlation'  # or 'rmse'
+       panel,
+       unit_column='unit',
+       time_column='period',
+       outcome_column='outcome',
+       treatment_column='treated',
+       pre_periods=[0, 1, 2, 3, 4]
    )
 
    # Select top 10 control units
-   best_controls = ranked.head(10)['unit_id'].tolist()
+   best_controls = ranked.head(10)['unit'].tolist()
diff --git a/docs/api/pretrends.rst b/docs/api/pretrends.rst
index dddb44d..0d81dc6 100644
--- a/docs/api/pretrends.rst
+++ b/docs/api/pretrends.rst
@@ -48,9 +48,10 @@ Example
    from diff_diff import MultiPeriodDiD, PreTrendsPower
 
    # First fit an event study
-   model = MultiPeriodDiD(reference_period=-1)
-   results = model.fit(data, outcome='y', treated='treated',
-                       time='period', unit='unit_id', treatment_start=5)
+   model = MultiPeriodDiD()
+   results = model.fit(data, outcome='y', treatment='treated',
+                       time='period', unit='unit_id',
+                       post_periods=[5, 6, 7], reference_period=4)
 
    # Compute pre-trends power for linear violations
    pt = PreTrendsPower(alpha=0.05, power=0.80, violation_type='linear')
@@ -130,12 +131,13 @@ Complete Example
    )
 
    # Fit event study
-   model = MultiPeriodDiD(reference_period=-1)
-   results = model.fit(data, outcome='y', treated='treated',
-                       time='period', unit='unit_id', treatment_start=5)
+   model = MultiPeriodDiD()
+   results = model.fit(data, outcome='y', treatment='treated',
+                       time='period', unit='unit_id',
+                       post_periods=[5, 6, 7], reference_period=4)
 
    # Compute MDV
-   mdv = compute_mdv(results, alpha=0.05, power=0.80)
+   mdv = compute_mdv(results, alpha=0.05, target_power=0.80)
    print(f"Minimum Detectable Violation: {mdv:.3f}")
 
    # Power curve analysis
@@ -143,15 +145,11 @@ Complete Example
    curve = pt.power_curve(results, n_points=50)
 
    # Plot power curve
-   fig = plot_pretrends_power(curve, show_mdv=True, target_power=0.80)
-   fig.savefig('pretrends_power.png')
+   ax = plot_pretrends_power(curve, target_power=0.80)
+   ax.figure.savefig('pretrends_power.png')
 
    # Integration with HonestDiD
-   sensitivity = pt.sensitivity_to_honest_did(
-       results,
-       honest_method='smoothness',
-       M_grid=np.linspace(0, mdv, 21)
-   )
+   sensitivity = pt.sensitivity_to_honest_did(results)
 
 References
 ----------
diff --git a/docs/api/two_stage.rst b/docs/api/two_stage.rst
new file mode 100644
index 0000000..90dc8de
--- /dev/null
+++ b/docs/api/two_stage.rst
@@ -0,0 +1,165 @@
+Two-Stage DiD (Gardner 2022)
+============================
+
+Two-stage residualization estimator for staggered Difference-in-Differences.
+
+This module implements the methodology from Gardner (2022), "Two-stage
+differences in differences". The method:
+
+1. Estimates unit + time fixed effects on untreated observations only
+2. Residualizes ALL outcomes using the estimated fixed effects
+3. Regresses residualized outcomes on treatment indicators (Stage 2)
+
+Inference uses the GMM sandwich variance estimator from Butts & Gardner
+(2022) that correctly accounts for first-stage estimation uncertainty.
+Point estimates are identical to ImputationDiD (Borusyak et al. 2024);
+the key difference is the variance estimator (GMM sandwich vs. conservative).
+
+**When to use TwoStageDiD:**
+
+- Staggered adoption settings where you want **efficient point estimates**
+  with variance that accounts for first-stage estimation uncertainty
+- When you prefer the GMM sandwich variance over the conservative variance
+  used by ImputationDiD — the sandwich estimator can yield tighter
+  confidence intervals when first-stage uncertainty is small
+- As a robustness check alongside CallawaySantAnna and ImputationDiD:
+  if all estimators agree, results are robust; if they disagree, investigate
+  treatment effect heterogeneity
+- When you need an event study that is **free of TWFE contamination bias**
+
+**Reference:** Gardner, J. (2022). Two-stage differences in differences.
+*arXiv:2207.05943*. Butts, K. & Gardner, J. (2022). did2s: Two-Stage
+Difference-in-Differences. *R Journal*, 14(1), 162-173.
+
+.. module:: diff_diff.two_stage
+
+TwoStageDiD
+------------
+
+Main estimator class for two-stage DiD estimation.
+
+.. autoclass:: diff_diff.TwoStageDiD
+   :members:
+   :undoc-members:
+   :show-inheritance:
+   :inherited-members:
+
+   .. rubric:: Methods
+
+   .. autosummary::
+
+      ~TwoStageDiD.fit
+      ~TwoStageDiD.get_params
+      ~TwoStageDiD.set_params
+
+TwoStageDiDResults
+------------------
+
+Results container for two-stage DiD estimation.
+
+.. autoclass:: diff_diff.TwoStageDiDResults
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+   .. rubric:: Methods
+
+   .. autosummary::
+
+      ~TwoStageDiDResults.summary
+      ~TwoStageDiDResults.print_summary
+      ~TwoStageDiDResults.to_dataframe
+
+TwoStageBootstrapResults
+------------------------
+
+Bootstrap inference results.
+
+.. autoclass:: diff_diff.TwoStageBootstrapResults
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+Convenience Function
+--------------------
+
+.. autofunction:: diff_diff.two_stage_did
+
+Example Usage
+-------------
+
+Basic usage::
+
+    from diff_diff import TwoStageDiD, generate_staggered_data
+
+    data = generate_staggered_data(n_units=200, seed=42)
+    est = TwoStageDiD()
+    results = est.fit(data, outcome='outcome', unit='unit',
+                      time='period', first_treat='first_treat')
+    results.print_summary()
+
+Event study with visualization::
+
+    from diff_diff import TwoStageDiD, plot_event_study
+
+    est = TwoStageDiD()
+    results = est.fit(data, outcome='outcome', unit='unit',
+                      time='period', first_treat='first_treat',
+                      aggregate='event_study')
+    plot_event_study(results)
+
+Comparison with other estimators::
+
+    from diff_diff import TwoStageDiD, CallawaySantAnna, ImputationDiD
+
+    # All three should agree under homogeneous effects
+    ts = TwoStageDiD().fit(data, outcome='outcome', unit='unit',
+                           time='period', first_treat='first_treat')
+    cs = CallawaySantAnna().fit(data, outcome='outcome', unit='unit',
+                                time='period', first_treat='first_treat')
+    imp = ImputationDiD().fit(data, outcome='outcome', unit='unit',
+                              time='period', first_treat='first_treat')
+
+    print(f"Two-Stage ATT: {ts.overall_att:.3f} (SE: {ts.overall_se:.3f})")
+    print(f"CS ATT:        {cs.overall_att:.3f} (SE: {cs.overall_se:.3f})")
+    print(f"Imputation ATT:{imp.overall_att:.3f} (SE: {imp.overall_se:.3f})")
+
+Estimator Comparison
+--------------------
+
+.. list-table:: TwoStageDiD vs. CallawaySantAnna vs. ImputationDiD
+   :header-rows: 1
+   :widths: 25 25 25 25
+
+   * - Feature
+     - TwoStageDiD
+     - CallawaySantAnna
+     - ImputationDiD
+   * - Point estimates
+     - Identical to ImputationDiD
+     - Group-time ATT(g,t)
+     - Identical to TwoStageDiD
+   * - Variance estimator
+     - GMM sandwich (accounts for first-stage uncertainty)
+     - Analytical IF/WIF or multiplier bootstrap
+     - Conservative (Theorem 3)
+   * - Control group
+     - Never-treated + not-yet-treated
+     - Never-treated or not-yet-treated
+     - Never-treated + not-yet-treated
+   * - Efficiency
+     - High (uses all untreated obs)
+     - Lower (2x2 comparisons)
+     - High (uses all untreated obs)
+   * - Heterogeneous effects
+     - Consistent under homogeneity
+     - Robust to heterogeneity
+     - Consistent under homogeneity
+   * - Covariates
+     - Supported
+     - Supported (outcome regression or IPW)
+     - Supported
+   * - Bootstrap
+     - Multiplier bootstrap on GMM influence function
+     - Multiplier bootstrap (IF/WIF)
+     - Multiplier bootstrap
diff --git a/docs/api/utils.rst b/docs/api/utils.rst
index 1c5a53f..bcf0171 100644
--- a/docs/api/utils.rst
+++ b/docs/api/utils.rst
@@ -26,13 +26,12 @@ Example
    result = check_parallel_trends(
        data,
        outcome='y',
-       unit='unit_id',
        time='period',
-       treated='treated',
-       pre_periods=4
+       treatment_group='treated',
+       pre_periods=[0, 1, 2, 3]
    )
 
-   print(f"F-statistic: {result['f_stat']:.3f}")
+   print(f"t-statistic: {result['t_statistic']:.3f}")
    print(f"p-value: {result['p_value']:.3f}")
 
    if result['p_value'] > 0.05:
@@ -65,10 +64,9 @@ Example
    result = equivalence_test_trends(
        data,
        outcome='y',
-       unit='unit_id',
        time='period',
-       treated='treated',
-       equivalence_bound=0.5  # Effect size bound
+       treatment_group='treated',
+       equivalence_margin=0.5  # Effect size bound
    )
 
    if result['equivalent']:
@@ -89,25 +87,24 @@ Example
 
 .. code-block:: python
 
-   from diff_diff import DifferenceInDifferences, wild_bootstrap_se
+   from diff_diff import DifferenceInDifferences, generate_did_data
 
-   # Fit model
-   did = DifferenceInDifferences()
-   results = did.fit(data, outcome='y', treated='treated', post='post')
+   panel = generate_did_data(n_units=200, n_periods=10, treatment_effect=2.0)
 
-   # Bootstrap standard errors
-   boot_results = wild_bootstrap_se(
-       data,
-       outcome='y',
-       treated='treated',
-       post='post',
-       cluster='unit_id',
-       n_bootstrap=999,
-       weight_type='rademacher'
-   )
+   # Use wild bootstrap via the estimator's inference parameter (recommended)
+   did = DifferenceInDifferences(inference='wild_bootstrap', n_bootstrap=999,
+                                  cluster='unit')
+   results = did.fit(panel, outcome='outcome', treatment='treated',
+                     time='post')
+
+   print(f"Bootstrap SE: {results.se:.3f}")
+   print(f"Bootstrap 95% CI: [{results.conf_int[0]:.3f}, {results.conf_int[1]:.3f}]")
+
+.. note::
 
-   print(f"Bootstrap SE: {boot_results.se:.3f}")
-   print(f"Bootstrap 95% CI: [{boot_results.ci[0]:.3f}, {boot_results.ci[1]:.3f}]")
+   ``wild_bootstrap_se()`` is a low-level function that operates on numpy arrays
+   (X, y, residuals, cluster_ids). For most users, the estimator-level
+   ``inference='wild_bootstrap'`` parameter shown above is more convenient.
 
 WildBootstrapResults
 ~~~~~~~~~~~~~~~~~~~~
@@ -130,10 +127,11 @@ The wild bootstrap supports several weight distributions:
 
 .. code-block:: python
 
-   # Using different weight types
-   boot_rad = wild_bootstrap_se(data, ..., weight_type='rademacher')
-   boot_webb = wild_bootstrap_se(data, ..., weight_type='webb')
-   boot_mammen = wild_bootstrap_se(data, ..., weight_type='mammen')
+   # Using different weight types (low-level array API)
+   # wild_bootstrap_se(X, y, residuals, cluster_ids, coefficient_index, ...)
+   boot_rad = wild_bootstrap_se(X, y, resid, clusters, 0, weight_type='rademacher')
+   boot_webb = wild_bootstrap_se(X, y, resid, clusters, 0, weight_type='webb')
+   boot_mammen = wild_bootstrap_se(X, y, resid, clusters, 0, weight_type='mammen')
 
 Recommendation
 ^^^^^^^^^^^^^^
diff --git a/docs/api/visualization.rst b/docs/api/visualization.rst
index 81b2829..a04d6b8 100644
--- a/docs/api/visualization.rst
+++ b/docs/api/visualization.rst
@@ -20,13 +20,13 @@ Example
    from diff_diff import MultiPeriodDiD, plot_event_study
 
    # Fit event study model
-   model = MultiPeriodDiD(reference_period=-1)
-   results = model.fit(data, outcome='y', treated='treated',
-                       time='period', unit='unit_id', treatment_start=5)
+   model = MultiPeriodDiD()
+   results = model.fit(data, outcome='y', treatment='treated',
+                       time='period', unit='unit_id', reference_period=2)
 
    # Create plot
-   fig = plot_event_study(results)
-   fig.savefig('event_study.png', dpi=300, bbox_inches='tight')
+   ax = plot_event_study(results)
+   ax.figure.savefig('event_study.png', dpi=300, bbox_inches='tight')
 
 plot_group_effects
 ------------------
@@ -47,7 +47,7 @@ Example
                     time='period', first_treat='first_treat')
 
    # Plot effects by treatment cohort
-   fig = plot_group_effects(results)
+   ax = plot_group_effects(results)
 
 plot_sensitivity
 ----------------
@@ -61,15 +61,15 @@ Example
 
 .. code-block:: python
 
-   from diff_diff import HonestDiD, DeltaRM, plot_sensitivity
+   from diff_diff import HonestDiD, plot_sensitivity
 
-   honest = HonestDiD(delta=DeltaRM(M_bar=1.0))
+   honest = HonestDiD(method='relative_magnitude', M=1.0)
    sensitivity = honest.sensitivity_analysis(
        results,
        M_grid=[0, 0.5, 1.0, 1.5, 2.0]
    )
 
-   fig = plot_sensitivity(sensitivity)
+   ax = plot_sensitivity(sensitivity)
 
 plot_honest_event_study
 -----------------------
@@ -83,9 +83,9 @@ Example
 
 .. code-block:: python
 
-   from diff_diff import HonestDiD, DeltaRM, plot_honest_event_study
+   from diff_diff import HonestDiD, plot_honest_event_study
 
-   honest = HonestDiD(delta=DeltaRM(M_bar=1.0))
+   honest = HonestDiD(method='relative_magnitude', M=1.0)
    bounds = honest.fit(event_study_results)
 
-   fig = plot_honest_event_study(event_study_results, bounds)
+   ax = plot_honest_event_study(bounds)
diff --git a/docs/choosing_estimator.rst b/docs/choosing_estimator.rst
index 7b5af2f..3670f9a 100644
--- a/docs/choosing_estimator.rst
+++ b/docs/choosing_estimator.rst
@@ -17,6 +17,8 @@ Start here and follow the questions:
 
    - **No** → Go to question 2
    - **Yes** → Use :class:`~diff_diff.CallawaySantAnna` (or :class:`~diff_diff.EfficientDiD` for tighter SEs under PT-All)
+   - **Yes, and you suspect homogeneous effects** → Use :class:`~diff_diff.ImputationDiD` or :class:`~diff_diff.TwoStageDiD` for tighter CIs
+   - **Want to diagnose TWFE bias?** → Use :class:`~diff_diff.BaconDecomposition` first
 
 2. **Do you have panel data?** (Multiple observations per unit over time)
 
@@ -71,6 +73,30 @@ Quick Reference
      - Continuous dose / treatment intensity
      - Strong Parallel Trends (SPT) for dose-response; PT for binarized ATT
      - ATT\ :sup:`loc` (PT); ATT(d), ACRT(d) (SPT)
+   * - ``SunAbraham``
+     - Staggered adoption, interaction-weighted
+     - Conditional parallel trends
+     - Cohort-specific ATTs, event study
+   * - ``ImputationDiD``
+     - Staggered, homogeneous effects
+     - Unit + time FE structure
+     - Imputed treatment effects, event study
+   * - ``TwoStageDiD``
+     - Staggered adoption, efficient
+     - Unit + time FE structure
+     - Single ATT or event study
+   * - ``StackedDiD``
+     - Staggered, sub-experiment approach
+     - Parallel trends per cohort
+     - Trimmed aggregate ATT
+   * - ``TROP``
+     - Factor confounding suspected
+     - Factor model + weights
+     - ATT with triple robustness
+   * - ``BaconDecomposition``
+     - TWFE diagnostic
+     - (diagnostic tool)
+     - 2x2 decomposition weights
 
 Detailed Guidance
 -----------------
@@ -89,7 +115,7 @@ Use :class:`~diff_diff.DifferenceInDifferences` when:
    from diff_diff import DifferenceInDifferences
 
    did = DifferenceInDifferences()
-   results = did.fit(data, outcome='y', treated='treated', post='post')
+   results = did.fit(data, outcome='y', treatment='treated', time='post')
 
 Two-Way Fixed Effects
 ~~~~~~~~~~~~~~~~~~~~~
@@ -112,7 +138,7 @@ Use :class:`~diff_diff.TwoWayFixedEffects` when:
    from diff_diff import TwoWayFixedEffects
 
    twfe = TwoWayFixedEffects()
-   results = twfe.fit(data, outcome='y', treated='treated',
+   results = twfe.fit(data, outcome='y', treatment='treated',
                       unit='unit_id', time='period')
 
 Multi-Period Event Study
@@ -129,9 +155,9 @@ Use :class:`~diff_diff.MultiPeriodDiD` when:
 
    from diff_diff import MultiPeriodDiD, plot_event_study
 
-   event = MultiPeriodDiD(reference_period=-1)
-   results = event.fit(data, outcome='y', treated='treated',
-                       time='period', unit='unit_id', treatment_start=5)
+   event = MultiPeriodDiD()
+   results = event.fit(data, outcome='y', treatment='treated',
+                       time='period', unit='unit_id', reference_period=2)
 
    # Visualize
    plot_event_study(results)
@@ -159,11 +185,14 @@ This is the recommended estimator for most applied work with staggered adoption.
                     time='period', first_treat='first_treat',
                     covariates=['x1', 'x2'])
 
-   # Get aggregated effects
-   print(f"Overall ATT: {results.att:.3f}")
+   # Overall ATT
+   print(f"Overall ATT: {results.overall_att:.3f}")
 
    # Event study aggregation
-   event_study = results.aggregate('event_time')
+   es = cs.fit(data, outcome='y', unit='unit_id',
+               time='period', first_treat='first_treat',
+               covariates=['x1', 'x2'], aggregate='event_study')
+   event_study_df = es.to_dataframe('event_study')
 
 Synthetic DiD
 ~~~~~~~~~~~~~
@@ -176,12 +205,13 @@ Use :class:`~diff_diff.SyntheticDiD` when:
 
 .. code-block:: python
 
-   from diff_diff import SyntheticDiD
+   from diff_diff import SyntheticDiD, generate_did_data
 
+   # SyntheticDiD requires block treatment (constant within units)
+   block_data = generate_did_data(n_units=40, n_periods=10, treatment_effect=2.0)
    sdid = SyntheticDiD()
-   results = sdid.fit(data, outcome='y', unit='unit_id',
-                      time='period', treated='treated',
-                      treatment_start=5)
+   results = sdid.fit(block_data, outcome='outcome', unit='unit',
+                      time='period', treatment='treated')
 
    # View the unit weights
    print(results.unit_weights)
@@ -244,6 +274,168 @@ Use :class:`~diff_diff.EfficientDiD` when:
                       aggregate='all')
    results.print_summary()
 
+Sun-Abraham
+~~~~~~~~~~~
+
+Use :class:`~diff_diff.SunAbraham` when:
+
+- You have staggered adoption and want an interaction-weighted event study
+- You want to decompose effects by cohort and relative time
+- You need a regression-based complement to Callaway-Sant'Anna
+
+Sun & Abraham (2021) uses a saturated TWFE regression with cohort x relative-time
+interactions, then aggregates cohort-specific effects using interaction weights.
+
+.. code-block:: python
+
+   from diff_diff import SunAbraham
+
+   sa = SunAbraham(control_group='never_treated')
+   results = sa.fit(data, outcome='y', unit='unit_id',
+                    time='period', first_treat='first_treat')
+   results.print_summary()
+
+.. note::
+
+   Running both Sun-Abraham and Callaway-Sant'Anna provides a useful robustness
+   check. Both are consistent under heterogeneous treatment effects.
+
+Imputation DiD
+~~~~~~~~~~~~~~
+
+Use :class:`~diff_diff.ImputationDiD` when:
+
+- You have staggered adoption with homogeneous treatment effects
+- You want shorter confidence intervals than Callaway-Sant'Anna (~50% shorter)
+- You need imputed counterfactual outcomes for treated observations
+
+Borusyak, Jaravel & Spiess (2024) estimate unit + time FE on untreated observations,
+impute counterfactual Y(0) for treated observations, then aggregate.
+
+.. code-block:: python
+
+   from diff_diff import ImputationDiD
+
+   imp = ImputationDiD()
+   results = imp.fit(data, outcome='y', unit='unit_id',
+                     time='period', first_treat='first_treat',
+                     aggregate='event_study')
+   results.print_summary()
+
+.. note::
+
+   Under homogeneous effects, ImputationDiD is semiparametrically efficient.
+   If you suspect heterogeneous effects across cohorts, prefer Callaway-Sant'Anna.
+
+Two-Stage DiD
+~~~~~~~~~~~~~
+
+Use :class:`~diff_diff.TwoStageDiD` when:
+
+- You want the same point estimates as ImputationDiD with a different variance estimator
+- You prefer the GMM sandwich variance that accounts for first-stage uncertainty
+- You want a single ATT or an event study from a two-stage procedure
+
+Gardner (2022) estimates FE on untreated obs (stage 1), residualizes all outcomes,
+then regresses residuals on treatment indicators (stage 2).
+
+.. code-block:: python
+
+   from diff_diff import TwoStageDiD
+
+   ts = TwoStageDiD()
+   results = ts.fit(data, outcome='y', unit='unit_id',
+                    time='period', first_treat='first_treat',
+                    aggregate='event_study')
+   results.print_summary()
+
+.. note::
+
+   Point estimates are identical to ImputationDiD; the key difference is the
+   variance estimator (GMM sandwich vs. conservative clustered).
+
+Stacked DiD
+~~~~~~~~~~~
+
+Use :class:`~diff_diff.StackedDiD` when:
+
+- You have staggered adoption and want a sub-experiment approach
+- You want to avoid forbidden comparisons in TWFE by construction
+- You need corrective Q-weights for unbiased stacked estimation
+
+Wing, Freedman & Hollingsworth (2024) create one sub-experiment per adoption cohort
+with clean controls and apply Q-weights to reweight the stacked regression.
+
+.. code-block:: python
+
+   from diff_diff import StackedDiD
+
+   stk = StackedDiD(kappa_pre=2, kappa_post=3)
+   results = stk.fit(data, outcome='y', unit='unit_id',
+                     time='period', first_treat='first_treat',
+                     aggregate='event_study')
+   results.print_summary()
+
+.. note::
+
+   The trimmed aggregate ATT may exclude early or late cohorts whose event
+   windows do not fit in the data. Check ``results.trimmed_groups``.
+
+TROP
+~~~~
+
+Use :class:`~diff_diff.TROP` when:
+
+- You suspect interactive fixed effects (factor confounding)
+- Standard parallel trends may not hold due to unobserved factors
+- You want triple robustness: factor model + unit weights + time weights
+
+Athey, Imbens, Qu & Viviano (2025) combine nuclear norm regularization,
+exponential unit distance weights, and time decay weights with LOOCV tuning.
+
+.. code-block:: python
+
+   from diff_diff import TROP
+
+   trop = TROP(n_bootstrap=200)
+   results = trop.fit(data, outcome='y', treatment='treated',
+                      unit='unit_id', time='period')
+   results.print_summary()
+
+.. note::
+
+   TROP is computationally intensive. Use ``method='global'`` for faster
+   estimation at the cost of some flexibility vs. ``method='twostep'``.
+
+Bacon Decomposition
+~~~~~~~~~~~~~~~~~~~
+
+Use :class:`~diff_diff.BaconDecomposition` when:
+
+- You want to **diagnose** whether TWFE is biased in your staggered setting
+- You need to see which 2x2 comparisons drive the TWFE estimate
+- You want to check whether later-vs-earlier or already-treated-as-control comparisons carry substantial weight
+
+Goodman-Bacon (2021) decomposes the TWFE estimate into a weighted average of
+all 2x2 DiD comparisons and their weights.
+
+.. code-block:: python
+
+   from diff_diff import BaconDecomposition, plot_bacon
+
+   bacon = BaconDecomposition()
+   results = bacon.fit(data, outcome='y', unit='unit_id',
+                       time='period', first_treat='first_treat')
+   results.print_summary()
+
+   # Visualize the decomposition
+   plot_bacon(results)
+
+.. note::
+
+   This is a diagnostic tool, not an estimator. If the decomposition reveals
+   problematic weights, switch to Callaway-Sant'Anna or another robust estimator.
+
 Common Pitfalls
 ---------------
 
@@ -275,7 +467,7 @@ Common Pitfalls
    Standard errors should typically be clustered at the level of treatment
    assignment (often the unit level).
 
-   *Solution*: Always specify ``cluster_col`` for panel data.
+   *Solution*: Always specify ``cluster`` for panel data.
 
 Standard Error Methods
 ----------------------
@@ -292,22 +484,46 @@ differences helps interpret results and choose appropriate inference.
      - Details
    * - ``DifferenceInDifferences``
      - HC1 (heteroskedasticity-robust)
-     - Uses White's robust SEs by default. Specify ``cluster_col`` for cluster-robust SEs. Use ``inference='wild_bootstrap'`` for few clusters (<30).
+     - Uses White's robust SEs by default. Specify ``cluster`` for cluster-robust SEs. Use ``inference='wild_bootstrap'`` for few clusters (<30).
    * - ``TwoWayFixedEffects``
      - Cluster-robust (unit level)
-     - Always clusters at unit level after within-transformation. Specify ``cluster_col`` to override. Use ``inference='wild_bootstrap'`` for few clusters.
+     - Always clusters at unit level after within-transformation. Specify ``cluster`` to override. Use ``inference='wild_bootstrap'`` for few clusters.
    * - ``MultiPeriodDiD``
      - HC1 (heteroskedasticity-robust)
-     - Same as basic DiD. Cluster-robust available via ``cluster_col``. Wild bootstrap not yet supported for multi-coefficient inference.
+     - Same as basic DiD. Cluster-robust available via ``cluster``. Wild bootstrap not yet supported for multi-coefficient inference.
    * - ``CallawaySantAnna``
-     - Analytical (simple difference)
-     - Uses simple variance of group-time means. Use ``bootstrap()`` method for multiplier bootstrap inference with proper SEs, CIs, and p-values.
+     - Analytical (influence function)
+     - Uses influence-function SEs with WIF adjustment by default. Set ``n_bootstrap=999`` for multiplier bootstrap inference (weight types: ``rademacher``, ``mammen``, ``webb``).
    * - ``SyntheticDiD``
-     - Bootstrap or placebo-based
-     - Default uses bootstrap resampling. Set ``n_bootstrap=0`` for placebo-based inference using pre-treatment residuals.
+     - Placebo or bootstrap
+     - Default uses placebo-based variance (``variance_method="placebo"``). Set ``variance_method="bootstrap"`` for bootstrap inference. Both methods use ``n_bootstrap`` replications (default 200).
    * - ``ContinuousDiD``
      - Analytical (influence function)
      - Uses influence-function-based SEs by default. Use ``n_bootstrap=199`` (or higher) for multiplier bootstrap inference with proper CIs.
+   * - ``SunAbraham``
+     - Cluster-robust (unit level)
+     - Clusters at unit level by default. Specify ``cluster`` to override. Use ``n_bootstrap`` for pairs bootstrap inference.
+   * - ``ImputationDiD``
+     - Conservative clustered (Theorem 3)
+     - Uses conservative clustered variance from Borusyak et al. Theorem 3, clustered at unit level. Use ``n_bootstrap`` for multiplier bootstrap.
+   * - ``TwoStageDiD``
+     - GMM sandwich (clustered)
+     - Uses GMM sandwich variance accounting for first-stage estimation uncertainty, clustered at unit level. Use ``n_bootstrap`` for multiplier bootstrap.
+   * - ``StackedDiD``
+     - Cluster-robust (unit level)
+     - Clusters at unit level by default. Set ``cluster='unit_subexp'`` for (unit, sub-experiment) clustering.
+   * - ``TripleDifference``
+     - Influence function (robust)
+     - Uses influence-function-based SEs (inherently heteroskedasticity-robust). Specify ``cluster`` for cluster-robust SEs.
+   * - ``TROP``
+     - Bootstrap (n_bootstrap=200)
+     - Uses unit-level block bootstrap for variance estimation. Bootstrap is always required (minimum n_bootstrap=2).
+   * - ``EfficientDiD``
+     - Analytical (EIF-based)
+     - Uses efficient influence function SE = sqrt(mean(EIF^2) / n). Use ``n_bootstrap`` for multiplier bootstrap.
+   * - ``BaconDecomposition``
+     - N/A (diagnostic)
+     - Diagnostic tool only; does not produce standard errors.
 
 **Recommendations by sample size:**
 
@@ -321,15 +537,19 @@ For panel data, always cluster at the unit level unless you have a strong reason
 
 .. code-block:: python
 
+   from diff_diff import DifferenceInDifferences, generate_did_data
+
+   panel = generate_did_data(n_units=200, n_periods=10, treatment_effect=2.0)
+
    # Good: Cluster at unit level for panel data
-   did = DifferenceInDifferences()
-   results = did.fit(data, outcome='y', treated='treated',
-                     post='post', cluster_col='unit_id')
+   did = DifferenceInDifferences(cluster='unit')
+   results = did.fit(panel, outcome='outcome', treatment='treated',
+                     time='post')
 
    # Better for few clusters: Wild bootstrap
-   did = DifferenceInDifferences(inference='wild_bootstrap')
-   results = did.fit(data, outcome='y', treated='treated',
-                     post='post', cluster_col='state')
+   did = DifferenceInDifferences(inference='wild_bootstrap', cluster='unit')
+   results = did.fit(panel, outcome='outcome', treatment='treated',
+                     time='post')
 
 When in Doubt
 -------------
diff --git a/docs/index.rst b/docs/index.rst
index a54646b..236997c 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,17 +10,19 @@ It provides sklearn-like estimators with statsmodels-style output for econometri
 
    # Fit a basic DiD model
    did = DifferenceInDifferences()
-   results = did.fit(data, outcome='y', treated='treated', post='post')
+   results = did.fit(data, outcome='y', treatment='treated', time='post')
    print(results.summary())
 
 Key Features
 ------------
 
-- **Multiple Estimators**: Basic DiD, Two-Way Fixed Effects, Multi-Period Event Studies, Synthetic DiD, and Callaway-Sant'Anna for staggered adoption
-- **Modern Inference**: Robust standard errors, cluster-robust SEs, and wild cluster bootstrap
-- **Assumption Testing**: Parallel trends tests, placebo tests, and comprehensive diagnostics
+- **13+ Estimators**: Basic DiD, TWFE, Event Study, Synthetic DiD, plus modern staggered estimators (Callaway-Sant'Anna, Sun-Abraham, Imputation, Two-Stage, Stacked DiD), advanced methods (TROP, Continuous DiD, Efficient DiD, Triple Difference), and Bacon Decomposition diagnostics
+- **Modern Inference**: Robust standard errors, cluster-robust SEs, wild cluster bootstrap, and multiplier bootstrap
+- **Assumption Testing**: Parallel trends tests, placebo tests, Bacon decomposition, and comprehensive diagnostics
 - **Sensitivity Analysis**: Honest DiD (Rambachan & Roth 2023) for robust inference under parallel trends violations
-- **Publication-Ready Output**: Summary tables and event study plots
+- **Built-in Datasets**: Real-world datasets from published studies (Card & Krueger, Castle Doctrine, and more)
+- **High Performance**: Optional Rust backend for compute-intensive estimators like Synthetic DiD and TROP
+- **Publication-Ready Output**: Summary tables, event study plots, and sensitivity analysis figures
 
 Installation
 ------------
@@ -64,16 +66,6 @@ Quick Links
    :hidden:
 
    api/index
-   api/estimators
-   api/staggered
-   api/results
-   api/visualization
-   api/diagnostics
-   api/honest_did
-   api/power
-   api/pretrends
-   api/utils
-   api/prep
 
 Indices and tables
 ------------------
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
index 500fc37..44219e2 100644
--- a/docs/methodology/REGISTRY.md
+++ b/docs/methodology/REGISTRY.md
@@ -1500,7 +1500,7 @@ For global method, LOOCV works as follows:
 *Assumption checks / warnings:*
 - Requires variation in treatment timing (staggered adoption)
 - Warns if only one treatment cohort (decomposition not meaningful)
-- Assumes no never-treated: uses not-yet-treated as controls
+- Uses never-treated units as controls when present; falls back to timing-only comparisons otherwise
 
 *Estimator equation (as implemented):*
 
diff --git a/docs/python_comparison.rst b/docs/python_comparison.rst
index d2bbc99..c65fdfa 100644
--- a/docs/python_comparison.rst
+++ b/docs/python_comparison.rst
@@ -36,7 +36,7 @@ Overview
      - ❌
      - ❌
    * - Sun-Abraham
-     - ❌
+     - ✅
      - ✅
      - ❌
      - ❌
@@ -197,13 +197,13 @@ Feature Comparison Table
      - ❌
      - ❌
    * - Sun-Abraham estimator
-     - ❌
+     - ✅
      - ✅
      - ❌
      - ❌
      - ❌
    * - Gardner's did2s
-     - ❌
+     - ✅
      - ✅
      - ❌
      - ❌
@@ -310,6 +310,48 @@ Feature Comparison Table
      - ✅
      - ✅
      - ❌
+   * - Triple Difference (DDD)
+     - ✅
+     - ❌
+     - ✅
+     - ❌
+     - ❌
+   * - TROP
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+     - ❌
+   * - Stacked DiD
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+     - ❌
+   * - Bacon Decomposition
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+     - ❌
+   * - Continuous DiD
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+     - ❌
+   * - Efficient DiD
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+     - ❌
+   * - Built-in datasets
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+     - ❌
    * - Bayesian inference
      - ❌
      - ❌
@@ -329,7 +371,7 @@ Basic DiD
    from diff_diff import DifferenceInDifferences
 
    did = DifferenceInDifferences()
-   results = did.fit(data, outcome='y', treated='treated', post='post')
+   results = did.fit(data, outcome='y', treatment='treated', time='post')
    print(results.summary())
 
 .. code-block:: python
@@ -366,9 +408,10 @@ Staggered DiD (Callaway-Sant'Anna)
        unit='unit',
        time='time',
        first_treat='first_treat',
-       covariates=['x1', 'x2']
+       covariates=['x1', 'x2'],
+       aggregate='event_study'
    )
-   event_study = results.aggregate('event_time')
+   event_study = results.event_study_effects
 
 .. code-block:: python
 
@@ -391,9 +434,9 @@ Sensitivity Analysis
 .. code-block:: python
 
    # diff-diff (only Python option)
-   from diff_diff import HonestDiD, DeltaRM, plot_sensitivity
+   from diff_diff import HonestDiD, plot_sensitivity
 
-   honest = HonestDiD(delta=DeltaRM(M_bar=1.0))
+   honest = HonestDiD(method='relative_magnitude', M=1.0)
    results = honest.fit(event_study_results)
 
    # Sensitivity over M grid
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index 3faef70..7d4f927 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -154,7 +154,7 @@ When treatment is adopted at different times across units:
        outcome='outcome',
        unit='unit_id',
        time='period',
-       first_treat='first_treatment_period'
+       first_treat='first_treat'
    )
 
    # View aggregated treatment effect
diff --git a/docs/r_comparison.rst b/docs/r_comparison.rst
index c0e61df..8096e48 100644
--- a/docs/r_comparison.rst
+++ b/docs/r_comparison.rst
@@ -114,10 +114,12 @@ staggered DiD. Here's how to translate common operations:
 
 .. code-block:: python
 
-   # Python
-   overall_att = results.att  # Simple aggregation
-   event_study = results.aggregate('event_time')  # Dynamic
-   by_group = results.aggregate('group')  # By cohort
+   # Python (unlike R's aggte(), aggregation is requested at fit time)
+   results = cs.fit(data, outcome='Y', time='period', unit='id',
+                    first_treat='G', aggregate='all')
+   overall_att = results.overall_att  # Simple aggregation
+   event_study = results.event_study_effects  # Dynamic
+   by_group = results.group_effects  # By cohort
 
 R ``HonestDiD`` Package → diff-diff
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -141,9 +143,9 @@ The HonestDiD package implements Rambachan & Roth (2023) sensitivity analysis:
 .. code-block:: python
 
    # Python
-   from diff_diff import HonestDiD, DeltaRM
+   from diff_diff import HonestDiD
 
-   honest = HonestDiD(delta=DeltaRM(M_bar=1.0))
+   honest = HonestDiD(method='relative_magnitude', M=1.0)
    results = honest.fit(event_study_results)
 
    # Sensitivity analysis over M grid
@@ -168,9 +170,9 @@ The HonestDiD package implements Rambachan & Roth (2023) sensitivity analysis:
 .. code-block:: python
 
    # Python
-   from diff_diff import HonestDiD, DeltaSD
+   from diff_diff import HonestDiD
 
-   honest = HonestDiD(delta=DeltaSD(M=0.05))
+   honest = HonestDiD(method='smoothness', M=0.05)
    results = honest.fit(event_study_results)
 
 R ``synthdid`` Package → diff-diff
@@ -191,14 +193,20 @@ The synthdid package implements Arkhangelsky et al. (2021):
    # Python
    from diff_diff import SyntheticDiD
 
+   # SyntheticDiD requires a time-invariant ever-treated indicator
+   data['ever_treated'] = data.groupby('unit')['treatment'].transform('max')
+
+   # Derive post-treatment periods from treatment timing
+   post_periods = sorted(data.loc[data['treatment'] == 1, 'time'].unique())
+
    sdid = SyntheticDiD()
    results = sdid.fit(
        data,
        outcome='Y',
        unit='unit',
        time='time',
-       treated='treatment',
-       treatment_start=T0
+       treatment='ever_treated',
+       post_periods=post_periods
    )
 
 Key Differences
@@ -320,6 +328,57 @@ Feature Comparison Table
      - ❌
      - ❌
      - ❌
+   * - Sun-Abraham
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+   * - Imputation DiD
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+   * - Two-Stage DiD (did2s)
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+   * - Stacked DiD
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+   * - Continuous DiD
+     - ✅
+     - ✅
+     - ❌
+     - ❌
+   * - Triple Difference (DDD)
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+   * - TROP
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+   * - Efficient DiD
+     - ✅
+     - ❌
+     - ❌
+     - ❌
+
+.. note::
+
+   R equivalents for estimators not covered by the ``did``, ``HonestDiD``, or
+   ``synthdid`` packages: Sun-Abraham is available via ``fixest::sunab()``;
+   Imputation DiD via the ``didimputation`` package; Two-Stage DiD via the
+   ``did2s`` package; Bacon Decomposition via the ``bacondecomp`` package;
+   Stacked DiD requires manual implementation or the ``stackedev`` package;
+   Continuous DiD is available via the ``did`` package continuous extension;
+   Triple Difference requires manual implementation in R.
+   TROP and Efficient DiD have no direct R equivalents.
 
 Migration Tips
 --------------
diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst
index d8aff49..3fa925a 100644
--- a/docs/troubleshooting.rst
+++ b/docs/troubleshooting.rst
@@ -29,7 +29,7 @@ Data Issues
 
    # Or use make_treatment_indicator
    from diff_diff import make_treatment_indicator
-   data['treated'] = make_treatment_indicator(data, 'group', treated_value='treatment')
+   data = make_treatment_indicator(data, 'group', treated_values='treatment')
 
 "Panel is unbalanced"
 ~~~~~~~~~~~~~~~~~~~~~
@@ -48,13 +48,13 @@ Data Issues
    from diff_diff import balance_panel
 
    # Balance the panel (keeps only units with all periods)
-   balanced = balance_panel(data, unit='unit_id', time='period')
+   balanced = balance_panel(data, unit_column='unit_id', time_column='period')
    print(f"Dropped {len(data) - len(balanced)} observations")
 
    # Alternative: check balance first
    from diff_diff import validate_did_data
-   issues = validate_did_data(data, outcome='y', treated='treated',
-                               unit='unit_id', time='period')
+   issues = validate_did_data(data, outcome='y', treatment='treated',
+                               time='period', unit='unit_id')
    print(issues)
 
 Estimation Errors
@@ -128,19 +128,17 @@ Standard Error Issues
 .. code-block:: python
 
    # For panel data, always cluster at unit level
-   results = did.fit(data, outcome='y', treated='treated',
-                     post='post', cluster_col='unit_id')
+   did = DifferenceInDifferences(cluster='unit_id')
+   results = did.fit(data, outcome='y', treatment='treated', time='post')
 
    # Compare SE methods
    did_robust = DifferenceInDifferences()
-   did_cluster = DifferenceInDifferences()
-   did_wild = DifferenceInDifferences(inference='wild_bootstrap')
+   did_cluster = DifferenceInDifferences(cluster='unit_id')
+   did_wild = DifferenceInDifferences(inference='wild_bootstrap', cluster='unit_id')
 
-   r1 = did_robust.fit(data, outcome='y', treated='treated', post='post')
-   r2 = did_cluster.fit(data, outcome='y', treated='treated',
-                        post='post', cluster_col='unit_id')
-   r3 = did_wild.fit(data, outcome='y', treated='treated',
-                     post='post', cluster_col='unit_id')
+   r1 = did_robust.fit(data, outcome='y', treatment='treated', time='post')
+   r2 = did_cluster.fit(data, outcome='y', treatment='treated', time='post')
+   r3 = did_wild.fit(data, outcome='y', treatment='treated', time='post')
 
    print(f"Robust SE: {r1.se:.4f}")
    print(f"Cluster SE: {r2.se:.4f}")
@@ -207,12 +205,14 @@ Staggered Adoption Issues
    print(data.groupby('first_treat')['unit_id'].nunique())
 
    # Use bootstrap for better inference
-   results = cs.fit(data, ...)
-   bootstrap_results = results.bootstrap(n_bootstrap=999)
+   cs = CallawaySantAnna(n_bootstrap=999)
+   results = cs.fit(data, outcome='y', unit='unit_id',
+                    time='period', first_treat='first_treat',
+                    aggregate='event_study')
 
-   # Aggregate to get more precise estimates
-   event_study = results.aggregate('event_time')
-   overall_att = results.att  # Aggregated ATT
+   # Access aggregated results
+   print(results.overall_att)  # Overall ATT
+   print(results.event_study_effects)  # Event study effects
 
 Visualization Issues
 --------------------
@@ -234,9 +234,11 @@ Visualization Issues
    # Specify reference period explicitly
    plot_event_study(results, reference_period=-1)
 
-   # For CallawaySantAnna, aggregate first
-   event_study = results.aggregate('event_time')
-   plot_event_study(event_study)
+   # For CallawaySantAnna, fit with aggregate='event_study'
+   results = cs.fit(data, outcome='y', unit='unit_id',
+                    time='period', first_treat='first_treat',
+                    aggregate='event_study')
+   plot_event_study(results)
 
 "Plot doesn't show in Jupyter"
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -250,15 +252,15 @@ Visualization Issues
    import matplotlib.pyplot as plt
 
    # Option 1: Use plt.show()
-   fig = plot_event_study(results)
+   ax = plot_event_study(results)
    plt.show()
 
    # Option 2: Use inline magic (Jupyter)
    %matplotlib inline
 
    # Option 3: Return and display figure
-   fig = plot_event_study(results)
-   fig  # Display in Jupyter
+   ax = plot_event_study(results)
+   ax  # Display in Jupyter
 
 Performance Issues
 ------------------
@@ -278,20 +280,327 @@ Performance Issues
 
 .. code-block:: python
 
-   # Use absorb instead of fixed_effects for high-dimensional FE
+   # TWFE already handles unit + time FE via within-transformation
    twfe = TwoWayFixedEffects()
-   results = twfe.fit(data, outcome='y', treated='treated',
-                      unit='unit_id', time='period',
-                      absorb=['unit_id', 'period'])  # Faster than fixed_effects
+   results = twfe.fit(data, outcome='y', treatment='treated',
+                      unit='unit_id', time='period')
 
    # Reduce bootstrap iterations for initial exploration
    did = DifferenceInDifferences(inference='wild_bootstrap', n_bootstrap=99)
 
    # For CallawaySantAnna, start without bootstrap
    cs = CallawaySantAnna()
-   results = cs.fit(data, ...)
-   # Only bootstrap for final results
-   bootstrap_results = results.bootstrap(n_bootstrap=999)
+   results = cs.fit(data, outcome='y', unit='unit_id',
+                    time='period', first_treat='first_treat')
+   # Use n_bootstrap for final results
+   cs_boot = CallawaySantAnna(n_bootstrap=999)
+   results = cs_boot.fit(data, outcome='y', unit='unit_id',
+                         time='period', first_treat='first_treat')
+
+Rust Backend Issues
+-------------------
+
+"Rust backend is not available"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** ``ImportError`` when using ``DIFF_DIFF_BACKEND=rust`` or attempting to
+use Rust-accelerated operations.
+
+**Causes:**
+
+1. Rust backend was not compiled during installation
+2. The ``maturin`` build step was skipped or failed
+3. Platform does not have a pre-built wheel available
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Check if Rust backend is available
+   from diff_diff import HAS_RUST_BACKEND
+   print(f"Rust backend available: {HAS_RUST_BACKEND}")
+
+   # Force pure Python mode (no Rust required)
+   import os
+   os.environ['DIFF_DIFF_BACKEND'] = 'python'
+
+.. code-block:: bash
+
+   # Rebuild with Rust backend
+   pip install -e ".[dev]"
+   maturin develop --release
+
+   # On macOS with Apple Accelerate
+   maturin develop --release --features accelerate
+
+TROP Issues
+-----------
+
+"All tuning parameter combinations failed"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** TROP raises an error that all tuning parameter combinations failed
+during leave-one-out cross-validation (LOOCV).
+
+**Causes:**
+
+1. Insufficient pre-treatment periods (minimum 2; recommend 4+ for stability)
+2. Near-constant outcomes that leave no variation to fit
+3. Data is too sparse for the requested lambda grids
+
+**Solutions:**
+
+.. code-block:: python
+
+   from diff_diff import TROP
+
+   # Widen the lambda grids to give the optimizer more room
+   trop = TROP(
+       lambda_time_grid=[0.0, 0.5, 1.0, 2.0, 5.0],
+       lambda_unit_grid=[0.0, 0.5, 1.0, 2.0, 5.0],
+       lambda_nn_grid=[0.0, 0.1, 1.0, 10.0],
+   )
+
+   # TROP requires at least 2 pre-treatment periods (4+ recommended)
+   pre_periods = data.loc[data['post'] == 0, 'period'].nunique()
+   print(f"Pre-treatment periods: {pre_periods}")  # Must be >= 2; stability improves with >= 4
+
+   # If TROP cannot find valid parameters, try CallawaySantAnna as a fallback
+   from diff_diff import CallawaySantAnna
+   cs = CallawaySantAnna()
+   results = cs.fit(data, outcome='y', unit='unit_id',
+                    time='period', first_treat='first_treat')
+
+"LOOCV fits failed / numerical instability"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** Partial LOOCV failures during TROP tuning, or warnings about
+numerical instability in cross-validation fits.
+
+**Causes:**
+
+1. Poor data quality (missing values, outliers)
+2. Regularization parameters too small for the data scale
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Check data quality
+   print(data[['y', 'treatment', 'post']].describe())
+   print(f"Missing values:\n{data.isnull().sum()}")
+
+   # Increase regularization to improve numerical stability
+   trop = TROP(
+       lambda_nn_grid=[0.1, 1.0, 10.0, 100.0],  # Larger minimum lambda
+   )
+
+"Few bootstrap iterations succeeded"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** TROP warns that only N of M bootstrap iterations completed
+successfully, leading to imprecise standard errors.
+
+**Causes:**
+
+1. Small sample sizes cause singular matrices in bootstrap resamples
+2. Complex model specification amplifies resampling instability
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Increase total bootstrap iterations to get enough successes
+   trop = TROP(n_bootstrap=999)
+
+   # Simplify the model to reduce bootstrap failures
+   trop = TROP(method='global', n_bootstrap=999)
+
+Continuous DiD Issues
+---------------------
+
+"Dose appears discrete"
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** ``ContinuousDiD`` warns that the dose variable appears to contain
+only integer or discrete values.
+
+**Causes:**
+
+1. Treatment is truly binary (0/1) and should use standard DiD
+2. Dose variable is coded as integers but represents a continuous measure
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Check dose distribution
+   print(data['dose'].value_counts())
+
+   # If treatment is truly binary, use standard DiD instead
+   from diff_diff import DifferenceInDifferences
+   did = DifferenceInDifferences()
+   results = did.fit(data, outcome='y', treatment='treatment', time='post')
+
+   # If dose is continuous but stored as int, convert
+   data['dose'] = data['dose'].astype(float)
+
+"No post-treatment cells available for aggregation"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** No (g, t) cells are available after filtering, so aggregation
+cannot produce an ATT estimate.
+
+**Causes:**
+
+1. ``first_treat`` is miscoded (e.g., all zeros or all the same value)
+2. No post-treatment periods exist in the data for treated cohorts
+3. Filtering removed all valid cells
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Check first_treat coding
+   print(data['first_treat'].value_counts())
+
+   # Verify that post-treatment periods exist for treated units
+   treated = data[data['first_treat'] > 0]
+   for g, group in treated.groupby('first_treat'):
+       post_obs = group[group['period'] >= g]
+       print(f"Cohort {g}: {len(post_obs)} post-treatment observations")
+
+Imputation / Two-Stage DiD Issues
+----------------------------------
+
+"Non-constant first_treat values"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** ``ImputationDiD`` or ``TwoStageDiD`` issues a warning because
+``first_treat`` varies within units. The estimator coerces to a single value
+per unit (using the first observed value) and proceeds, but results may be
+unreliable.
+
+**Causes:**
+
+1. Units switch treatment status back and forth
+2. Data merge errors created inconsistent ``first_treat`` values
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Check for non-constant first_treat within units
+   varying = data.groupby('unit_id')['first_treat'].nunique()
+   bad_units = varying[varying > 1].index
+   print(f"Units with varying first_treat: {len(bad_units)}")
+
+   # Fix: ensure first_treat is constant per unit (absorbing state)
+   first_treat_map = data.groupby('unit_id')['first_treat'].first()
+   data['first_treat'] = data['unit_id'].map(first_treat_map)
+
+"Units treated in all observed periods"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** All observed periods for some units are post-treatment, so no
+pre-treatment outcomes exist to construct counterfactuals.
+
+**Causes:**
+
+1. Always-treated units entered the panel already treated
+2. Observation window starts after treatment onset for some cohorts
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Identify always-treated units (treated at or before first observed period)
+   # Exclude never-treated (first_treat == 0) which are the control group
+   unit_ft = data.groupby('unit_id')['first_treat'].first()
+   min_period = data['period'].min()
+   always_treated = unit_ft[(unit_ft > 0) & (unit_ft <= min_period)]
+   print(f"Always-treated units: {len(always_treated)}")
+
+   # Drop always-treated units (keep never-treated controls)
+   data = data[~data['unit_id'].isin(always_treated.index)]
+
+"Horizons not identified without never-treated units"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** Certain event study horizons return NaN because they require
+never-treated units for identification (Proposition 5 in Borusyak et al.).
+
+**Causes:**
+
+1. No never-treated units in the data
+2. Specific long-horizon estimates need a comparison group that spans those periods
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Check for never-treated units
+   never_treated = data.groupby('unit_id')['first_treat'].first()
+   print(f"Never-treated units: {(never_treated == 0).sum()}")
+
+   # Option 1: Include never-treated units in your sample
+   # Option 2: Accept NaN for unidentified horizons
+   results = ImputationDiD().fit(data, outcome='y', unit='unit_id',
+                                time='period', first_treat='first_treat')
+   # NaN horizons are expected when never-treated units are absent
+
+Bacon Decomposition Issues
+--------------------------
+
+"Unbalanced panel detected"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** ``BaconDecomposition`` issues a warning because the panel is
+unbalanced. Bacon decomposition assumes balanced panels and results may be
+inaccurate with missing observations.
+
+**Causes:**
+
+1. Some units are missing observations for certain time periods
+2. Units entered or exited the panel at different times
+
+**Solutions:**
+
+.. code-block:: python
+
+   from diff_diff import balance_panel, BaconDecomposition
+
+   # Balance the panel first
+   balanced = balance_panel(data, unit_column='unit_id', time_column='period')
+   print(f"Dropped {len(data) - len(balanced)} observations to balance panel")
+
+   # Then run decomposition
+   bacon = BaconDecomposition()
+   results = bacon.fit(balanced, outcome='y', unit='unit_id',
+                       time='period', first_treat='first_treat')
+
+Deprecation Warnings
+--------------------
+
+"method='joint' is deprecated"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Problem:** TROP emits a ``FutureWarning`` that ``method='joint'`` is
+deprecated.
+
+**Causes:**
+
+1. Code uses the old ``method='joint'`` parameter name
+
+**Solutions:**
+
+.. code-block:: python
+
+   # Old (deprecated)
+   trop = TROP(method='joint')
+
+   # New (use 'global' instead)
+   trop = TROP(method='global')
 
 Getting Help
 ------------
@@ -310,7 +619,7 @@ If you encounter issues not covered here:
 
    data = generate_did_data(n_units=100, n_periods=10, treatment_effect=2.0)
    did = DifferenceInDifferences()
-   results = did.fit(data, outcome='y', treated='treated', post='post')
+   results = did.fit(data, outcome='outcome', treatment='treated', time='post')
    print(f"True effect: 2.0, Estimated: {results.att:.3f}")
 
 For bugs or feature requests, please open an issue on
diff --git a/pyproject.toml b/pyproject.toml
index 907738d..b040d43 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ dev = [
     "ruff>=0.1.0",
     "mypy>=1.0",
     "maturin>=1.4,<2.0",
+    "matplotlib>=3.5",
 ]
 docs = [
     "sphinx>=6.0",
diff --git a/tests/test_doc_snippets.py b/tests/test_doc_snippets.py
new file mode 100644
index 0000000..68b9ab4
--- /dev/null
+++ b/tests/test_doc_snippets.py
@@ -0,0 +1,386 @@
+"""
+Smoke tests for Python code blocks in RST documentation.
+
+Extracts ``.. code-block:: python`` snippets from RST files and executes them
+in isolated namespaces with synthetic data and mock dataset loaders. Fails on
+all exceptions except NameError (context-dependent snippets) and
+ImportError for known third-party/optional packages (comparison-page
+snippets and optional-dependency guards like matplotlib).
+"""
+
+import re
+import textwrap
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+import pytest
+
+# ---------------------------------------------------------------------------
+# RST files to validate (the ones that had review findings + key user-facing)
+# ---------------------------------------------------------------------------
+DOCS_DIR = Path(__file__).resolve().parent.parent / "docs"
+
+RST_FILES = [
+    "choosing_estimator.rst",
+    "troubleshooting.rst",
+    "quickstart.rst",
+    "index.rst",
+    "api/datasets.rst",
+    "api/diagnostics.rst",
+    "api/utils.rst",
+    "api/prep.rst",
+    "api/two_stage.rst",
+    "api/bacon.rst",
+    "api/visualization.rst",
+    "api/honest_did.rst",
+    "api/pretrends.rst",
+    "python_comparison.rst",
+    "r_comparison.rst",
+]
+
+# ---------------------------------------------------------------------------
+# Snippet extraction
+# ---------------------------------------------------------------------------
+_CODE_BLOCK_RE = re.compile(
+    r"^\.\.\s+code-block::\s+python\s*$\n"  # directive line
+    r"(?:\s*:\w[^:]*:.*\n)*"  # optional directive options
+    r"\n"  # blank separator
+    r"((?:[ \t]+\S.*\n|[ \t]*\n)+)",  # indented body
+    re.MULTILINE,
+)
+
+# RST ``::`` shorthand code blocks (paragraph ending with ``::``, blank line,
+# indented body).  Only matches paragraph-ending ``::`` — excludes RST
+# directives (lines starting with ``..``).
+_SHORTHAND_BLOCK_RE = re.compile(
+    r"^(?!\s*\.\.).*\S::\s*$\n"  # non-directive line ending with ::
+    r"\n"  # blank separator
+    r"((?:[ \t]+\S.*\n|[ \t]*\n)+)",  # indented body
+    re.MULTILINE,
+)
+
+# Heuristic: skip ``::`` blocks that look like shell or prose, not Python.
+_SHELL_HINTS_RE = re.compile(
+    r"^\s*(\$\s|#!|pip\s+install|maturin\s)", re.MULTILINE
+)
+_PROSE_HINT_RE = re.compile(
+    r"^[A-Z][a-z]+ [a-z]+ [a-z]+", re.MULTILINE  # English prose sentence
+)
+
+
+def _extract_snippets(rst_path: Path) -> List[Tuple[int, str]]:
+    """Return list of (block_index, dedented_code) from an RST file."""
+    text = rst_path.read_text()
+    snippets = []
+    idx = 0
+    for m in _CODE_BLOCK_RE.finditer(text):
+        code = textwrap.dedent(m.group(1))
+        snippets.append((idx, code))
+        idx += 1
+    for m in _SHORTHAND_BLOCK_RE.finditer(text):
+        code = textwrap.dedent(m.group(1))
+        # Skip blocks that look like shell commands or prose, not Python
+        if _SHELL_HINTS_RE.search(code) or _PROSE_HINT_RE.search(code):
+            continue
+        snippets.append((idx, code))
+        idx += 1
+    return snippets
+
+
+# ---------------------------------------------------------------------------
+# Skip heuristics
+# ---------------------------------------------------------------------------
+_SKIP_PATTERNS = [
+    r"%matplotlib",  # Jupyter magics
+    r"plt\.show\(\)",  # interactive display
+    r"^\s*fig\s*$",  # bare variable display in Jupyter
+    r"maturin\s+develop",  # shell commands in python block
+    r"pip\s+install",
+    r"wild_bootstrap_se\(X,",  # low-level array API pseudo-code
+    r"wide_to_long\(",  # references undefined wide_data variable
+]
+
+# Third-party packages imported by comparison-page snippets that may not
+# be installed in the test environment.  Only these are exempt from
+# ImportError failures — diff_diff and stdlib imports must succeed.
+_THIRD_PARTY_MODULES = {"pyfixest", "linearmodels", "differences", "matplotlib"}
+
+
+def _should_skip(code: str) -> Optional[str]:
+    """Return a reason string if the snippet should be skipped, else None."""
+    for pat in _SKIP_PATTERNS:
+        if re.search(pat, code, re.MULTILINE):
+            return f"matches skip pattern: {pat}"
+    # Skip if no actual Python statements (just comments / blank)
+    lines = [l.strip() for l in code.splitlines() if l.strip() and not l.strip().startswith("#")]
+    if not lines:
+        return "no executable statements"
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Build parameterized test cases
+# ---------------------------------------------------------------------------
+def _collect_cases() -> List[Tuple[str, str, Optional[str]]]:
+    """Collect (test_id, code, skip_reason) triples."""
+    cases = []
+    for rel in RST_FILES:
+        rst_path = DOCS_DIR / rel
+        if not rst_path.exists():
+            continue
+        label = rel.replace("/", "_").removesuffix(".rst")
+        for idx, code in _extract_snippets(rst_path):
+            test_id = f"{label}:block{idx}"
+            skip = _should_skip(code)
+            cases.append((test_id, code, skip))
+    return cases
+
+
+_CASES = _collect_cases()
+
+# ---------------------------------------------------------------------------
+# Shared namespace builder
+# ---------------------------------------------------------------------------
+def _build_namespace() -> dict:
+    """
+    Build an exec namespace with diff_diff imports and synthetic data.
+
+    Provides ``data`` (staggered panel) and ``balanced`` (same ref) so that
+    most snippets that reference ``data`` can execute.
+    """
+    import diff_diff
+
+    ns: dict = {"__builtins__": __builtins__}
+
+    # Make all public diff_diff names available
+    for name in dir(diff_diff):
+        if not name.startswith("_"):
+            ns[name] = getattr(diff_diff, name)
+
+    ns["diff_diff"] = diff_diff
+
+    # Remove 'results' module — it shadows the common variable name that
+    # context-dependent snippets use for fit() return values.
+    ns.pop("results", None)
+
+    # Synthetic datasets that doc snippets commonly reference
+    rng = np.random.default_rng(42)
+    staggered = diff_diff.generate_staggered_data(
+        n_units=60, n_periods=10, seed=42
+    )
+    # Add alias columns that doc snippets expect
+    # Use a simple time split (not unit-specific) so basic 2x2 DID works
+    mid = staggered["period"].median()
+    staggered["post"] = (staggered["period"] >= mid).astype(int)
+    staggered["treatment"] = staggered["treated"]
+    staggered["y"] = staggered["outcome"]
+    staggered["unit_id"] = staggered["unit"]
+    staggered["x1"] = rng.normal(size=len(staggered))
+    staggered["x2"] = rng.normal(size=len(staggered))
+    staggered["x3"] = rng.normal(size=len(staggered))
+    staggered["state"] = staggered["unit_id"]
+    staggered["time"] = staggered["period"]
+    # Uppercase aliases for comparison page snippets (R naming conventions)
+    staggered["Y"] = staggered["outcome"]
+    staggered["id"] = staggered["unit"]
+    staggered["G"] = staggered["first_treat"]
+    staggered["X1"] = staggered["x1"]
+    staggered["X2"] = staggered["x2"]
+    staggered["ever_treated"] = staggered["treated"]
+    staggered["group"] = np.where(staggered["treated"] == 1, "treatment", "control")
+    staggered["exposure"] = rng.uniform(0, 1, size=len(staggered))
+    staggered["dose"] = rng.choice([0.0, 0.5, 1.0, 2.0], size=len(staggered))
+
+    ns["data"] = staggered
+    ns["balanced"] = staggered.copy()
+    ns["df"] = staggered
+
+    # numpy / pandas always handy
+    ns["np"] = np
+    ns["pd"] = pd
+
+    # matplotlib stub so plot calls don't actually render
+    try:
+        import matplotlib
+
+        matplotlib.use("Agg")
+        import matplotlib.pyplot as plt
+
+        ns["plt"] = plt
+        ns["matplotlib"] = matplotlib
+    except ImportError:
+        pass
+
+    # ------------------------------------------------------------------
+    # Mock dataset loaders — return synthetic DataFrames matching schemas
+    # so that dataset doc snippets execute without network access.
+    # ------------------------------------------------------------------
+    def _mock_load_card_krueger(**kwargs):
+        n = 40
+        return pd.DataFrame({
+            "store_id": range(n),
+            "state": ["NJ"] * (n // 2) + ["PA"] * (n // 2),
+            "chain": (["bk", "kfc", "roys", "wendys"] * 10)[:n],
+            "emp_pre": rng.normal(20, 5, n),
+            "emp_post": rng.normal(21, 5, n),
+            "wage_pre": rng.normal(4.5, 0.3, n),
+            "wage_post": rng.normal(5.0, 0.3, n),
+            "treated": [1] * (n // 2) + [0] * (n // 2),
+        })
+
+    def _mock_load_castle_doctrine(**kwargs):
+        states = [f"S{i:02d}" for i in range(10)]
+        years = list(range(2000, 2011))
+        rows = [(s, y) for s in states for y in years]
+        n = len(rows)
+        ft = [0] * 55 + [2005] * 22 + [2007] * 22 + [2009] * 11
+        return pd.DataFrame({
+            "state": [r[0] for r in rows],
+            "year": [r[1] for r in rows],
+            "first_treat": ft[:n],
+            "homicide_rate": rng.normal(5, 1, n),
+            "population": rng.integers(500000, 5000000, n),
+            "income": rng.normal(30000, 5000, n),
+            "treated": [1 if ft[i] and r[1] >= ft[i] else 0
+                        for i, r in enumerate(rows)][:n],
+            "cohort": ft[:n],
+        })
+
+    def _mock_load_divorce_laws(**kwargs):
+        states = [f"S{i:02d}" for i in range(10)]
+        years = list(range(1965, 1990))
+        rows = [(s, y) for s in states for y in years]
+        n = len(rows)
+        ft = [0] * 125 + [1970] * 50 + [1975] * 50 + [1980] * 25
+        return pd.DataFrame({
+            "state": [r[0] for r in rows],
+            "year": [r[1] for r in rows],
+            "first_treat": ft[:n],
+            "divorce_rate": rng.normal(4, 1, n),
+            "female_lfp": rng.normal(50, 5, n),
+            "suicide_rate": rng.normal(5, 2, n),
+            "treated": [1 if ft[i] and r[1] >= ft[i] else 0
+                        for i, r in enumerate(rows)][:n],
+            "cohort": ft[:n],
+        })
+
+    def _mock_load_mpdta(**kwargs):
+        counties = list(range(1, 21))
+        years = list(range(2003, 2008))
+        rows = [(c, y) for c in counties for y in years]
+        n = len(rows)
+        ft = ([0] * 25 + [2004] * 25 + [2006] * 25 + [2007] * 25)[:n]
+        return pd.DataFrame({
+            "countyreal": [r[0] for r in rows],
+            "year": [r[1] for r in rows],
+            "lpop": rng.normal(10, 1, n),
+            "lemp": rng.normal(8, 0.5, n),
+            "first_treat": ft,
+            "treat": [1 if f != 0 else 0 for f in ft],
+        })
+
+    _dataset_dispatch = {
+        "card_krueger": _mock_load_card_krueger,
+        "castle_doctrine": _mock_load_castle_doctrine,
+        "divorce_laws": _mock_load_divorce_laws,
+        "mpdta": _mock_load_mpdta,
+    }
+
+    def _mock_load_dataset(name, **kwargs):
+        if name not in _dataset_dispatch:
+            raise ValueError(f"Unknown dataset: {name}")
+        return _dataset_dispatch[name](**kwargs)
+
+    def _mock_list_datasets():
+        return {
+            "card_krueger": "Card & Krueger (1994) minimum wage dataset",
+            "castle_doctrine": "Castle Doctrine laws - staggered adoption",
+            "divorce_laws": "Unilateral divorce laws - staggered adoption",
+            "mpdta": "Minimum wage panel data - simulated CS example",
+        }
+
+    # Inject mocks into namespace so `from diff_diff.datasets import ...` works
+    import types
+    mock_datasets_mod = types.ModuleType("diff_diff.datasets")
+    mock_datasets_mod.load_card_krueger = _mock_load_card_krueger
+    mock_datasets_mod.load_castle_doctrine = _mock_load_castle_doctrine
+    mock_datasets_mod.load_divorce_laws = _mock_load_divorce_laws
+    mock_datasets_mod.load_mpdta = _mock_load_mpdta
+    mock_datasets_mod.load_dataset = _mock_load_dataset
+    mock_datasets_mod.list_datasets = _mock_list_datasets
+    import sys
+    sys.modules["diff_diff.datasets"] = mock_datasets_mod
+    diff_diff.datasets = mock_datasets_mod
+
+    # Also put loaders directly in namespace for bare-name usage
+    ns["load_card_krueger"] = _mock_load_card_krueger
+    ns["load_castle_doctrine"] = _mock_load_castle_doctrine
+    ns["load_divorce_laws"] = _mock_load_divorce_laws
+    ns["load_mpdta"] = _mock_load_mpdta
+    ns["load_dataset"] = _mock_load_dataset
+    ns["list_datasets"] = _mock_list_datasets
+
+    return ns
+
+
+# ---------------------------------------------------------------------------
+# Test
+# ---------------------------------------------------------------------------
+@pytest.fixture(autouse=True)
+def _restore_datasets_module():
+    """Restore diff_diff.datasets after each test to prevent mock leaking."""
+    import sys as _sys
+    import diff_diff as _dd
+    orig_mod = _sys.modules.get("diff_diff.datasets")
+    orig_attr = getattr(_dd, "datasets", None)
+    yield
+    if orig_mod is not None:
+        _sys.modules["diff_diff.datasets"] = orig_mod
+    elif "diff_diff.datasets" in _sys.modules:
+        del _sys.modules["diff_diff.datasets"]
+    if orig_attr is not None:
+        _dd.datasets = orig_attr
+
+
+@pytest.mark.parametrize(
+    "test_id, code, skip_reason",
+    [pytest.param(tid, c, s, id=tid) for tid, c, s in _CASES],
+)
+def test_doc_snippet(test_id: str, code: str, skip_reason: Optional[str]):
+    """Execute a documentation code snippet and assert no API/runtime errors."""
+    if skip_reason:
+        pytest.skip(skip_reason)
+
+    ns = _build_namespace()
+    try:
+        exec(compile(code, f"<{test_id}>", "exec"), ns)
+    except NameError:
+        # NameError means the snippet references a variable from a prior
+        # context block (e.g. ``results`` from an earlier fit).  This is
+        # expected for isolated execution — not an API mismatch.
+        pass
+    except ImportError as exc:
+        # Only suppress ImportError for known third-party packages that
+        # comparison-page snippets import (or optional-dependency guards
+        # that raise ImportError manually with the package name in the
+        # message). In-package (diff_diff.*) and stdlib import failures
+        # should still fail the test.
+        mod_name = getattr(exc, "name", "") or ""
+        top_level = mod_name.split(".")[0]
+        msg = str(exc).lower()
+        is_known = top_level in _THIRD_PARTY_MODULES or any(
+            pkg in msg for pkg in _THIRD_PARTY_MODULES
+        )
+        if not is_known:
+            pytest.fail(
+                f"Snippet {test_id} raised ImportError for "
+                f"'{mod_name}': {exc}\n\n"
+                f"Code:\n{textwrap.indent(code, '  ')}"
+            )
+    except Exception as exc:
+        pytest.fail(
+            f"Snippet {test_id} raised {type(exc).__name__}: {exc}\n\n"
+            f"Code:\n{textwrap.indent(code, '  ')}"
+        )