diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 4117e04..0b94285 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -5,6 +5,8 @@ version: 2
 
 build:
   os: ubuntu-22.04
+  apt_packages:
+    - pandoc
   tools:
     python: "3.11"
   jobs:
@@ -16,7 +18,7 @@ build:
       #
       # Keep in sync with pyproject.toml [project.dependencies]
       # and [project.optional-dependencies.docs].
-      - pip install "numpy>=1.20.0" "pandas>=1.3.0" "scipy>=1.7.0" "sphinx>=6.0" "sphinx-rtd-theme>=1.0"
+      - pip install "numpy>=1.20.0" "pandas>=1.3.0" "scipy>=1.7.0" "sphinx>=6.0" "pydata-sphinx-theme>=0.15" "sphinxext-opengraph>=0.9" "sphinx-sitemap>=2.5" "nbsphinx>=0.9" "matplotlib>=3.5"
 
 # Build documentation in the "docs/" directory with Sphinx
 sphinx:
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000..de45d02
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,27 @@
+cff-version: 1.2.0
+title: "diff-diff: Difference-in-Differences Causal Inference for Python"
+message: "If you use this software, please cite it as below."
+type: software
+authors:
+  - name: "diff-diff contributors"
+license: MIT
+version: "2.7.1"
+date-released: "2026-03-18"
+url: "https://github.com/igerber/diff-diff"
+repository-code: "https://github.com/igerber/diff-diff"
+keywords:
+  - difference-in-differences
+  - causal-inference
+  - econometrics
+  - python
+  - treatment-effects
+  - event-study
+  - staggered-adoption
+  - parallel-trends
+  - synthetic-control
+  - panel-data
+abstract: >-
+  A Python library for Difference-in-Differences (DiD) causal inference analysis.
+  Provides sklearn-like estimators for modern DiD methods including
+  Callaway-Sant'Anna, Synthetic DiD, Honest DiD, event studies, and parallel
+  trends testing. Validated against R packages (did, synthdid, fixest).
diff --git a/README.md b/README.md
index ae28d3c..834da93 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,11 @@
 # diff-diff
 
+[![PyPI version](https://img.shields.io/pypi/v/diff-diff.svg)](https://pypi.org/project/diff-diff/)
+[![Python versions](https://img.shields.io/pypi/pyversions/diff-diff.svg)](https://pypi.org/project/diff-diff/)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
+[![Downloads](https://img.shields.io/pypi/dm/diff-diff.svg)](https://pypi.org/project/diff-diff/)
+[![Documentation](https://readthedocs.org/projects/diff-diff/badge/?version=stable)](https://diff-diff.readthedocs.io/en/stable/)
+
 A Python library for Difference-in-Differences (DiD) causal inference analysis with an sklearn-like API and statsmodels-style outputs.
 
 ## Installation
@@ -2909,6 +2915,21 @@ The `HonestDiD` module implements sensitivity analysis methods for relaxing the
 
 - **Cunningham, S. (2021).** *Causal Inference: The Mixtape*. Yale University Press. [https://mixtape.scunning.com/](https://mixtape.scunning.com/)
 
+## Citing diff-diff
+
+If you use diff-diff in your research, please cite it:
+
+```bibtex
+@software{diff_diff,
+  title = {diff-diff: Difference-in-Differences Causal Inference for Python},
+  author = {{diff-diff contributors}},
+  url = {https://github.com/igerber/diff-diff},
+  license = {MIT},
+}
+```
+
+See [`CITATION.cff`](CITATION.cff) for the full citation metadata.
+
 ## License
 
 MIT License
diff --git a/TODO.md b/TODO.md
index fe779df..3e63771 100644
--- a/TODO.md
+++ b/TODO.md
@@ -63,6 +63,7 @@ Deferred items from PR reviews that were not addressed before merge.
 | R comparison tests spawn separate `Rscript` per test (slow CI) | `tests/test_methodology_twfe.py:294` | #139 | Low |
 | CS R helpers hard-code `xformla = ~ 1`; no covariate-adjusted R benchmark for IRLS path | `tests/test_methodology_callaway.py` | #202 | Low |
 | Context-dependent doc snippets pass via blanket NameError; no standalone validation | `tests/test_doc_snippets.py`, `docs/api/visualization.rst`, `docs/python_comparison.rst`, `docs/r_comparison.rst` | #206 | Low |
+| ~1,460 `duplicate object description` Sphinx warnings — each class attribute is documented in both module API pages and autosummary stubs; fix by adding `:no-index:` to one location or restructuring API docs to avoid overlap | `docs/api/*.rst`, `docs/api/_autosummary/` | — | Low |
 
 ---
 
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
index 0dd6505..fe5831f 100644
--- a/docs/_static/custom.css
+++ b/docs/_static/custom.css
@@ -26,11 +26,6 @@ table.docutils td, table.docutils th {
     font-weight: bold;
 }
 
-/* Method/function signature styling */
-.sig-name {
-    font-weight: bold;
-}
-
 /* Better parameter list styling */
 .field-list {
     margin-top: 1em;
diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html
new file mode 100644
index 0000000..517e8b4
--- /dev/null
+++ b/docs/_templates/layout.html
@@ -0,0 +1,21 @@
+{% extends "pydata_sphinx_theme/layout.html" %}
+{% block extrahead %}
+{{ super() }}
+<script type="application/ld+json">
+{
+  "@context": "https://schema.org",
+  "@type": "SoftwareApplication",
+  "name": "diff-diff",
+  "description": "Python library for Difference-in-Differences causal inference analysis with sklearn-like API",
+  "applicationCategory": "Scientific Software",
+  "operatingSystem": "Cross-platform",
+  "programmingLanguage": "Python",
+  "url": "https://diff-diff.readthedocs.io",
+  "downloadUrl": "https://pypi.org/project/diff-diff/",
+  "softwareVersion": "{{ release }}",
+  "license": "https://opensource.org/licenses/MIT",
+  "offers": {"@type": "Offer", "price": "0", "priceCurrency": "USD"},
+  "author": {"@type": "Organization", "name": "diff-diff contributors"}
+}
+</script>
+{% endblock %}
diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst
index 519ff06..f11e01a 100644
--- a/docs/benchmarks.rst
+++ b/docs/benchmarks.rst
@@ -1,3 +1,7 @@
+.. meta::
+   :description: Validation benchmarks comparing diff-diff against R packages (did, synthdid, fixest). Coefficient accuracy, standard error comparison, and performance metrics.
+   :keywords: difference-in-differences benchmark, DiD validation R, python econometrics accuracy, did package comparison
+
 Benchmarks: Validation Against R Packages
 =========================================
 
diff --git a/docs/choosing_estimator.rst b/docs/choosing_estimator.rst
index 3670f9a..0e82bf4 100644
--- a/docs/choosing_estimator.rst
+++ b/docs/choosing_estimator.rst
@@ -1,3 +1,7 @@
+.. meta::
+   :description: Guide to choosing the right Difference-in-Differences estimator. Covers basic DiD, TWFE, staggered adoption methods (Callaway-Sant'Anna, Sun-Abraham), Synthetic DiD, and more.
+   :keywords: which DiD estimator, staggered DiD estimator, difference-in-differences method selection, TWFE alternatives
+
 Choosing an Estimator
 =====================
 
diff --git a/docs/conf.py b/docs/conf.py
index 4755e57..ade3cfd 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -8,9 +8,6 @@
 
 # Add repository root to sys.path so autodoc imports from checked-out source
 # without needing pip install (which would require the Rust/maturin toolchain).
-# Note: visualization.py lazily imports matplotlib inside functions, so it is
-# not needed as a build dependency. If a future module adds a top-level
-# matplotlib import, add it to the RTD dep list in .readthedocs.yaml.
 sys.path.insert(0, os.path.abspath(".."))
 
 import diff_diff
@@ -30,10 +27,13 @@
     "sphinx.ext.viewcode",
     "sphinx.ext.intersphinx",
     "sphinx.ext.mathjax",
+    "sphinxext.opengraph",
+    "sphinx_sitemap",
+    "nbsphinx",
 ]
 
 templates_path = ["_templates"]
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "llms.txt", "llms-full.txt"]
 
 # -- Options for autodoc -----------------------------------------------------
 autodoc_default_options = {
@@ -62,17 +62,56 @@
 napoleon_attr_annotations = True
 
 # -- Options for HTML output -------------------------------------------------
-html_theme = "sphinx_rtd_theme"
+html_theme = "pydata_sphinx_theme"
 html_static_path = ["_static"]
+html_title = "diff-diff: Difference-in-Differences Causal Inference for Python"
+# Use RTD's canonical URL when available; fall back to stable for local builds.
+_canonical_url = os.environ.get(
+    "READTHEDOCS_CANONICAL_URL",
+    "https://diff-diff.readthedocs.io/en/stable/",
+)
+html_baseurl = _canonical_url
+html_extra_path = ["llms.txt", "llms-full.txt"]
+sitemap_url_scheme = "{link}"
 
 html_theme_options = {
+    "icon_links": [
+        {
+            "name": "GitHub",
+            "url": "https://github.com/igerber/diff-diff",
+            "icon": "fa-brands fa-github",
+        },
+        {
+            "name": "PyPI",
+            "url": "https://pypi.org/project/diff-diff/",
+            "icon": "fa-brands fa-python",
+        },
+    ],
     "navigation_depth": 4,
-    "collapse_navigation": False,
-    "sticky_navigation": True,
-    "includehidden": True,
-    "titles_only": False,
+    "show_toc_level": 2,
+    "use_edit_page_button": True,
 }
 
+html_context = {
+    "github_user": "igerber",
+    "github_repo": "diff-diff",
+    "github_version": "main",
+    "doc_path": "docs",
+}
+
+# -- Options for sphinxext-opengraph -----------------------------------------
+ogp_site_url = _canonical_url
+ogp_site_name = "diff-diff"
+ogp_description_length = 200
+ogp_type = "website"
+ogp_enable_meta_description = True
+ogp_social_cards = {
+    "line_color": "#1f77b4",
+}
+
+# -- Options for nbsphinx ---------------------------------------------------
+nbsphinx_execute = "never"
+
 # -- Options for intersphinx -------------------------------------------------
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3", None),
@@ -83,19 +122,17 @@
 
 # -- ReadTheDocs version-aware banner ----------------------------------------
 # Shows a warning on development builds so users know they may be reading
-# docs for unreleased features. Only activates on RTD (not local builds).
+# docs for unreleased features. Uses PyData theme's announcement bar on RTD,
+# falls back to rst_prolog for local builds.
 rtd_version = os.environ.get("READTHEDOCS_VERSION", "")
 rtd_version_type = os.environ.get("READTHEDOCS_VERSION_TYPE", "")
 
 if rtd_version == "latest" or rtd_version_type == "branch":
-    rst_prolog = """
-.. warning::
-
-   This documentation is for the **development version** of diff-diff.
-   It may describe features not yet available in the latest PyPI release.
-   For stable documentation, use the version selector (bottom-left) to switch to **stable**.
-
-"""
+    html_theme_options["announcement"] = (
+        "This documentation is for the <strong>development version</strong> of diff-diff. "
+        "It may describe features not yet available in the latest PyPI release. "
+        'Use the version selector to switch to <a href="/en/stable/">stable</a>.'
+    )
 
 # -- Custom CSS --------------------------------------------------------------
 def setup(app):
diff --git a/docs/index.rst b/docs/index.rst
index 236997c..6bdc8db 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,3 +1,7 @@
+.. meta::
+   :description: diff-diff — Python library for Difference-in-Differences causal inference. Callaway-Sant'Anna, Synthetic DiD, Honest DiD, event studies, parallel trends. sklearn-like API, validated against R.
+   :keywords: difference-in-differences, python, causal inference, DiD, econometrics, treatment effects, staggered adoption, event study
+
 diff-diff: Difference-in-Differences in Python
 ==============================================
 
@@ -67,6 +71,84 @@ Quick Links
 
    api/index
 
+.. toctree::
+   :maxdepth: 1
+   :caption: Tutorials
+   :hidden:
+
+   tutorials/01_basic_did
+   tutorials/02_staggered_did
+   tutorials/03_synthetic_did
+   tutorials/04_parallel_trends
+   tutorials/05_honest_did
+   tutorials/06_power_analysis
+   tutorials/07_pretrends_power
+   tutorials/08_triple_diff
+   tutorials/09_real_world_examples
+   tutorials/10_trop
+   tutorials/11_imputation_did
+   tutorials/12_two_stage_did
+   tutorials/13_stacked_did
+   tutorials/14_continuous_did
+   tutorials/15_efficient_did
+
+What is Difference-in-Differences?
+----------------------------------
+
+Difference-in-Differences (DiD) is a quasi-experimental research design that estimates
+causal treatment effects by comparing outcome changes over time between treated and
+control groups. It is one of the most widely used methods in applied economics,
+public policy evaluation, and social science research.
+
+Why diff-diff?
+--------------
+
+- **Complete method coverage**: 13+ estimators from basic 2x2 DiD to cutting-edge methods like Efficient DiD (Chen et al. 2025) and TROP (Athey et al. 2025)
+- **Familiar API**: sklearn-like ``fit()`` interface — if you know scikit-learn, you know diff-diff
+- **Modern staggered methods**: Callaway-Sant'Anna, Sun-Abraham, Imputation DiD, Two-Stage DiD, and Stacked DiD handle heterogeneous treatment timing correctly
+- **Robust inference**: Heteroskedasticity-robust, cluster-robust, wild cluster bootstrap, and multiplier bootstrap
+- **Sensitivity analysis**: Honest DiD (Rambachan & Roth 2023) for robust inference under parallel trends violations
+- **Validated against R**: Benchmarked against ``did``, ``synthdid``, and ``fixest`` — see :doc:`benchmarks`
+- **No heavy dependencies**: Only numpy, pandas, and scipy
+
+Supported Estimators
+--------------------
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Estimator
+     - Description
+   * - :class:`~diff_diff.DifferenceInDifferences`
+     - Basic 2x2 DiD with robust/clustered standard errors
+   * - :class:`~diff_diff.TwoWayFixedEffects`
+     - Panel data with unit and time fixed effects
+   * - :class:`~diff_diff.MultiPeriodDiD`
+     - Event study with period-specific treatment effects
+   * - :class:`~diff_diff.CallawaySantAnna`
+     - Callaway & Sant'Anna (2021) for staggered adoption
+   * - :class:`~diff_diff.SunAbraham`
+     - Sun & Abraham (2021) interaction-weighted estimator
+   * - :class:`~diff_diff.ImputationDiD`
+     - Borusyak, Jaravel & Spiess (2024) imputation estimator
+   * - :class:`~diff_diff.TwoStageDiD`
+     - Gardner (2022) two-stage residualized estimator
+   * - :class:`~diff_diff.SyntheticDiD`
+     - Synthetic DiD combining DiD and synthetic control
+   * - :class:`~diff_diff.StackedDiD`
+     - Wing, Freedman & Hollingsworth (2024) stacked DiD
+   * - :class:`~diff_diff.EfficientDiD`
+     - Chen, Sant'Anna & Xie (2025) efficient DiD
+   * - :class:`~diff_diff.TripleDifference`
+     - Triple difference (DDD) estimator
+   * - :class:`~diff_diff.ContinuousDiD`
+     - Continuous treatment DiD
+   * - :class:`~diff_diff.TROP`
+     - Triply Robust Panel with factor model adjustment (Athey et al. 2025)
+   * - :class:`~diff_diff.BaconDecomposition`
+     - Goodman-Bacon decomposition diagnostics
+
 Indices and tables
 ------------------
 
diff --git a/docs/llms-full.txt b/docs/llms-full.txt
new file mode 100644
index 0000000..8d4b7a2
--- /dev/null
+++ b/docs/llms-full.txt
@@ -0,0 +1,1453 @@
+# diff-diff
+
+> A Python library for Difference-in-Differences causal inference analysis. Provides sklearn-like estimators with statsmodels-style output for econometric analysis.
+
+- Version: 2.7.1
+- Repository: https://github.com/igerber/diff-diff
+- License: MIT
+- Dependencies: numpy, pandas, scipy (no statsmodels dependency)
+- Optional: Rust backend for performance (via maturin)
+
+## Quick Start
+
+```python
+import pandas as pd
+from diff_diff import DifferenceInDifferences, generate_did_data
+
+# Generate synthetic data with known treatment effect
+data = generate_did_data(n_units=200, treatment_effect=5.0, seed=42)
+
+# Fit basic 2x2 DiD
+did = DifferenceInDifferences()
+results = did.fit(data, outcome='outcome', treatment='treated', time='post')
+print(results.summary())
+print(f"ATT: {results.att:.3f} (SE: {results.se:.3f})")
+```
+
+## Design Patterns
+
+- **sklearn-like API**: All estimators use `fit()` method, `get_params()`/`set_params()` for configuration.
+- **Formula interface**: Supports R-style formulas like `"outcome ~ treated * post"`.
+- **Results objects**: Rich dataclass containers with `summary()`, `to_dict()`, `to_dataframe()`.
+- **Estimator aliases**: Short names available (e.g., `DiD`, `CS`, `SA`, `BJS`, `Gardner`, `SDiD`, `TWFE`, `DDD`, `CDiD`, `EDiD`, `Stacked`, `Bacon`).
+
+## Estimators
+
+### DifferenceInDifferences
+
+Basic 2x2 Difference-in-Differences estimator.
+
+```python
+DifferenceInDifferences(
+    robust: bool = True,                    # HC1 robust standard errors
+    cluster: str | None = None,             # Column for cluster-robust SEs
+    alpha: float = 0.05,                    # Significance level
+    inference: str = "analytical",          # "analytical" or "wild_bootstrap"
+    n_bootstrap: int = 999,                 # Bootstrap replications (if inference="wild_bootstrap")
+    bootstrap_weights: str = "rademacher",  # "rademacher", "webb", or "mammen"
+    seed: int | None = None,                # Random seed
+    rank_deficient_action: str = "warn",    # "warn", "error", or "silent"
+)
+```
+
+**Alias:** `DiD`
+
+**fit() parameters:**
+
+```python
+did.fit(
+    data: pd.DataFrame,
+    outcome: str = None,                   # Outcome variable column
+    treatment: str = None,                 # Treatment indicator column (0/1)
+    time: str = None,                      # Post-treatment indicator column (0/1)
+    formula: str = None,                   # R-style formula (e.g., "y ~ treated * post")
+    covariates: list[str] = None,          # Linear control variables
+    fixed_effects: list[str] = None,       # Low-dimensional FE (dummy variables)
+    absorb: list[str] = None,             # High-dimensional FE (within-transformation)
+) -> DiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import DifferenceInDifferences
+
+did = DifferenceInDifferences(robust=True)
+results = did.fit(data, outcome='y', treatment='treated', time='post')
+results.print_summary()
+
+# Formula interface
+results = did.fit(data, formula='y ~ treated * post')
+
+# With covariates and fixed effects
+results = did.fit(data, outcome='y', treatment='treated', time='post',
+                  covariates=['age', 'income'], absorb=['firm_id'])
+```
+
+### TwoWayFixedEffects
+
+Two-Way Fixed Effects estimator for panel data. Inherits from DifferenceInDifferences.
+
+```python
+TwoWayFixedEffects(
+    robust: bool = True,
+    cluster: str | None = None,   # Auto-clusters at unit level if None
+    alpha: float = 0.05,
+)
+```
+
+**Alias:** `TWFE`
+
+**fit() parameters:**
+
+```python
+twfe.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    treatment: str,
+    time: str,
+    unit: str,
+    covariates: list[str] = None,
+) -> DiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import TwoWayFixedEffects
+
+twfe = TwoWayFixedEffects()
+results = twfe.fit(data, outcome='y', treatment='treated', time='post', unit='unit_id')
+results.print_summary()
+```
+
+**Note:** TWFE can be biased with staggered treatment timing and heterogeneous effects. Consider CallawaySantAnna, SunAbraham, or ImputationDiD for staggered designs.
+
+### MultiPeriodDiD
+
+Event-study style DiD with period-specific treatment effects. Inherits from DifferenceInDifferences.
+
+```python
+MultiPeriodDiD(
+    robust: bool = True,
+    cluster: str | None = None,
+    alpha: float = 0.05,
+)
+```
+
+**Alias:** `EventStudy`
+
+**fit() parameters:**
+
+```python
+mp_did.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    treatment: str,
+    time: str,
+    post_periods: list = None,             # Which periods are post-treatment
+    covariates: list[str] = None,
+    fixed_effects: list[str] = None,
+    absorb: list[str] = None,
+    reference_period: Any = None,          # Reference period (default: last pre-period)
+) -> MultiPeriodDiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import MultiPeriodDiD, plot_event_study
+
+did = MultiPeriodDiD()
+results = did.fit(data, outcome='sales', treatment='treated',
+                  time='period', post_periods=[4, 5, 6, 7])
+results.print_summary()
+plot_event_study(results)
+```
+
+### CallawaySantAnna
+
+Callaway-Sant'Anna (2021) estimator for staggered DiD with heterogeneous treatment effects.
+
+```python
+CallawaySantAnna(
+    control_group: str = "never_treated",        # "never_treated" or "not_yet_treated"
+    anticipation: int = 0,                       # Anticipation periods
+    estimation_method: str = "dr",               # "dr", "ipw", or "reg"
+    alpha: float = 0.05,
+    cluster: str | None = None,                  # Defaults to unit-level clustering
+    n_bootstrap: int = 0,                        # 0 = analytical SEs, 999+ recommended
+    bootstrap_weights: str | None = None,        # "rademacher", "mammen", or "webb"
+    seed: int | None = None,
+    rank_deficient_action: str = "warn",
+    base_period: str = "varying",                # "varying" or "universal"
+    cband: bool = True,                          # Simultaneous confidence bands
+    pscore_trim: float = 0.01,                   # Propensity score trimming bound
+)
+```
+
+**Alias:** `CS`
+
+**fit() parameters:**
+
+```python
+cs.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,              # Column: first treatment period (0 or inf for never-treated)
+    covariates: list[str] = None,
+    aggregate: str = None,         # None, "simple", "event_study", "group", or "all"
+    balance_e: int = None,         # Balance event study at this relative period
+) -> CallawaySantAnnaResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import CallawaySantAnna, plot_event_study
+
+cs = CallawaySantAnna(estimation_method="dr", n_bootstrap=999, seed=42)
+results = cs.fit(data, outcome='outcome', unit='unit', time='period',
+                 first_treat='first_treat', aggregate='event_study')
+results.print_summary()
+plot_event_study(results)
+```
+
+### SunAbraham
+
+Sun-Abraham (2021) interaction-weighted estimator for staggered DiD.
+
+```python
+SunAbraham(
+    control_group: str = "never_treated",        # "never_treated" or "not_yet_treated"
+    anticipation: int = 0,
+    alpha: float = 0.05,
+    cluster: str | None = None,                  # Defaults to unit-level clustering
+    n_bootstrap: int = 0,                        # 0 = analytical cluster-robust SEs
+    seed: int | None = None,
+    rank_deficient_action: str = "warn",
+)
+```
+
+**Alias:** `SA`
+
+**fit() parameters:**
+
+```python
+sa.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,
+    covariates: list[str] = None,
+) -> SunAbrahamResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import SunAbraham
+
+sa = SunAbraham()
+results = sa.fit(data, outcome='outcome', unit='unit',
+                 time='period', first_treat='first_treat')
+results.print_summary()
+```
+
+### ImputationDiD
+
+Borusyak-Jaravel-Spiess (2024) imputation DiD estimator. Efficient estimator producing shorter CIs than CS/SA under homogeneous effects.
+
+```python
+ImputationDiD(
+    anticipation: int = 0,
+    alpha: float = 0.05,
+    cluster: str | None = None,                  # Defaults to unit-level clustering
+    n_bootstrap: int = 0,                        # 0 = analytical (Theorem 3 variance)
+    bootstrap_weights: str = "rademacher",       # "rademacher", "mammen", or "webb"
+    seed: int | None = None,
+    rank_deficient_action: str = "warn",
+    horizon_max: int | None = None,              # Max event-study horizon
+    aux_partition: str = "cohort_horizon",        # "cohort_horizon", "cohort", or "horizon"
+)
+```
+
+**Alias:** `BJS`
+
+**fit() parameters:**
+
+```python
+imp.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,
+    covariates: list[str] = None,
+    aggregate: str = None,         # None, "simple", "event_study", "group", or "all"
+    balance_e: int = None,
+) -> ImputationDiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import ImputationDiD, plot_event_study
+
+est = ImputationDiD()
+results = est.fit(data, outcome='outcome', unit='unit',
+                  time='period', first_treat='first_treat',
+                  aggregate='event_study')
+results.print_summary()
+plot_event_study(results)
+```
+
+### TwoStageDiD
+
+Gardner (2022) two-stage DiD estimator. Point estimates match ImputationDiD; uses GMM sandwich variance.
+
+```python
+TwoStageDiD(
+    anticipation: int = 0,
+    alpha: float = 0.05,
+    cluster: str | None = None,
+    n_bootstrap: int = 0,
+    bootstrap_weights: str = "rademacher",
+    seed: int | None = None,
+    rank_deficient_action: str = "warn",
+    horizon_max: int | None = None,
+)
+```
+
+**Alias:** `Gardner`
+
+**fit() parameters:**
+
+```python
+ts.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,
+    covariates: list[str] = None,
+    aggregate: str = None,         # None, "simple", "event_study", "group", or "all"
+    balance_e: int = None,
+) -> TwoStageDiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import TwoStageDiD
+
+est = TwoStageDiD()
+results = est.fit(data, outcome='outcome', unit='unit',
+                  time='period', first_treat='first_treat')
+results.print_summary()
+```
+
+### SyntheticDiD
+
+Synthetic Difference-in-Differences (Arkhangelsky et al. 2021). Combines DiD with synthetic control by re-weighting control units.
+
+```python
+SyntheticDiD(
+    zeta_omega: float | None = None,        # Unit weight regularization (auto-computed if None)
+    zeta_lambda: float | None = None,       # Time weight regularization (auto-computed if None)
+    alpha: float = 0.05,
+    variance_method: str = "placebo",       # "placebo" or "bootstrap"
+    n_bootstrap: int = 200,                 # Replications for variance estimation
+    seed: int | None = None,
+)
+```
+
+**Alias:** `SDiD`
+
+**fit() parameters:**
+
+```python
+sdid.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    treatment: str,
+    unit: str,
+    time: str,
+    post_periods: list,
+) -> SyntheticDiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import SyntheticDiD
+
+sdid = SyntheticDiD(seed=42)
+results = sdid.fit(data, outcome='outcome', treatment='treated',
+                   unit='unit', time='period', post_periods=[5, 6, 7, 8])
+results.print_summary()
+weights_df = results.get_unit_weights_df()
+```
+
+### TripleDifference
+
+Triple Difference (DDD) estimator following Ortiz-Villavicencio & Sant'Anna (2025).
+
+```python
+TripleDifference(
+    estimation_method: str = "dr",            # "dr", "reg", or "ipw"
+    robust: bool = True,
+    cluster: str | None = None,
+    alpha: float = 0.05,
+    pscore_trim: float = 0.01,
+    rank_deficient_action: str = "warn",
+)
+```
+
+**Alias:** `DDD`
+
+**fit() parameters:**
+
+```python
+ddd.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    group: str,                # Treated group indicator (0/1)
+    partition: str,            # Eligible partition indicator (0/1)
+    time: str,                 # Post-treatment indicator (0/1)
+    covariates: list[str] = None,
+) -> TripleDifferenceResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import TripleDifference
+
+ddd = TripleDifference(estimation_method="dr")
+results = ddd.fit(data, outcome='outcome', group='group',
+                  partition='partition', time='post')
+results.print_summary()
+```
+
+### ContinuousDiD
+
+Continuous Difference-in-Differences estimator (Callaway, Goodman-Bacon & Sant'Anna 2024). Estimates dose-response curves ATT(d) and ACRT(d).
+
+```python
+ContinuousDiD(
+    degree: int = 3,                          # B-spline degree (3 = cubic)
+    num_knots: int = 0,                       # Interior knots
+    dvals: np.ndarray | None = None,          # Custom dose evaluation grid
+    control_group: str = "never_treated",     # "never_treated" or "not_yet_treated"
+    anticipation: int = 0,
+    base_period: str = "varying",             # "varying" or "universal"
+    alpha: float = 0.05,
+    n_bootstrap: int = 0,
+    bootstrap_weights: str = "rademacher",
+    seed: int | None = None,
+    rank_deficient_action: str = "warn",
+)
+```
+
+**Alias:** `CDiD`
+
+**fit() parameters:**
+
+```python
+cdid.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,
+    dose: str,                     # Column with continuous treatment dose
+    aggregate: str = None,         # None, "dose", "eventstudy"
+) -> ContinuousDiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import ContinuousDiD
+
+est = ContinuousDiD(n_bootstrap=199, seed=42)
+results = est.fit(data, outcome='outcome', unit='unit', time='period',
+                  first_treat='first_treat', dose='dose', aggregate='dose')
+results.print_summary()
+```
+
+### StackedDiD
+
+Stacked DiD estimator (Wing, Freedman & Hollingsworth 2024). Addresses TWFE bias with corrective Q-weights.
+
+```python
+StackedDiD(
+    kappa_pre: int = 1,                       # Pre-treatment event-time periods
+    kappa_post: int = 1,                      # Post-treatment event-time periods
+    weighting: str = "aggregate",             # "aggregate", "population", or "sample_share"
+    clean_control: str = "not_yet_treated",   # "not_yet_treated", "strict", or "never_treated"
+    cluster: str = "unit",                    # "unit" or "unit_subexp"
+    alpha: float = 0.05,
+    anticipation: int = 0,
+    rank_deficient_action: str = "warn",
+)
+```
+
+**Alias:** `Stacked`
+
+**fit() parameters:**
+
+```python
+stacked.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,
+    aggregate: str = None,         # None, "simple", or "event_study"
+    population: str = None,        # Required when weighting="population"
+) -> StackedDiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import StackedDiD, plot_event_study
+
+est = StackedDiD(kappa_pre=2, kappa_post=2)
+results = est.fit(data, outcome='outcome', unit='unit',
+                  time='period', first_treat='first_treat',
+                  aggregate='event_study')
+results.print_summary()
+plot_event_study(results)
+```
+
+### EfficientDiD
+
+Efficient DiD estimator (Chen, Sant'Anna & Xie 2025). Achieves the semiparametric efficiency bound for ATT(g,t). Phase 1: no-covariates path only.
+
+```python
+EfficientDiD(
+    pt_assumption: str = "all",              # "all" (overidentified) or "post" (just-identified)
+    alpha: float = 0.05,
+    cluster: str | None = None,              # Not yet implemented
+    n_bootstrap: int = 0,                    # Multiplier bootstrap iterations
+    bootstrap_weights: str = "rademacher",   # "rademacher", "mammen", or "webb"
+    seed: int | None = None,
+    anticipation: int = 0,
+)
+```
+
+**Alias:** `EDiD`
+
+**fit() parameters:**
+
+```python
+edid.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,
+    covariates: list[str] = None,  # Not yet implemented (Phase 2)
+    aggregate: str = None,         # None, "simple", "event_study", "group", or "all"
+    balance_e: int = None,
+) -> EfficientDiDResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import EfficientDiD
+
+edid = EfficientDiD(pt_assumption="all")
+results = edid.fit(data, outcome='y', unit='id', time='t',
+                   first_treat='first_treat', aggregate='all')
+results.print_summary()
+```
+
+### TROP
+
+Triply Robust Panel estimator (Athey, Imbens, Qu & Viviano 2025). Combines nuclear norm regularization, distance-based unit weights, and time decay weights.
+
+```python
+TROP(
+    method: str = "twostep",                  # "twostep" or "global" (or deprecated "joint")
+    lambda_time_grid: list[float] = None,     # Time weight decay grid [0, 0.1, 0.5, 1, 2, 5]
+    lambda_unit_grid: list[float] = None,     # Unit weight decay grid [0, 0.1, 0.5, 1, 2, 5]
+    lambda_nn_grid: list[float] = None,       # Nuclear norm grid [0, 0.01, 0.1, 1, 10]
+    max_iter: int = 100,
+    tol: float = 1e-6,
+    alpha: float = 0.05,
+    n_bootstrap: int = 200,
+    seed: int | None = None,
+)
+```
+
+**fit() parameters:**
+
+```python
+trop.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    treatment: str,                # Absorbing-state treatment indicator (0/1). Must be 0 for all pre-treatment periods and 1 for treatment and post-treatment periods.
+    unit: str,
+    time: str,
+) -> TROPResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import TROP
+
+trop = TROP(method='twostep', seed=42)
+results = trop.fit(data, outcome='outcome', treatment='treated',
+                   unit='unit', time='period')
+results.print_summary()
+```
+
+### BaconDecomposition
+
+Goodman-Bacon (2021) decomposition of TWFE into 2x2 DiD comparisons.
+
+```python
+BaconDecomposition(
+    weights: str = "approximate",   # "approximate" or "exact"
+)
+```
+
+**Alias:** `Bacon`
+
+**fit() parameters:**
+
+```python
+bacon.fit(
+    data: pd.DataFrame,
+    outcome: str,
+    unit: str,
+    time: str,
+    first_treat: str,
+) -> BaconDecompositionResults
+```
+
+**Usage:**
+
+```python
+from diff_diff import BaconDecomposition, plot_bacon
+
+bacon = BaconDecomposition(weights="exact")
+results = bacon.fit(data, outcome='outcome', unit='unit',
+                    time='period', first_treat='first_treat')
+results.print_summary()
+plot_bacon(results)
+```
+
+### Convenience Functions
+
+```python
+# Functional interfaces (create estimator + call fit in one step)
+from diff_diff import imputation_did, two_stage_did, triple_difference, stacked_did, trop, bacon_decompose
+
+results = imputation_did(data, outcome='y', unit='id', time='t', first_treat='ft')
+results = two_stage_did(data, outcome='y', unit='id', time='t', first_treat='ft')
+results = triple_difference(data, outcome='y', group='g', partition='p', time='t')
+results = stacked_did(data, outcome='y', unit='id', time='t', first_treat='ft',
+                      kappa_pre=2, kappa_post=2)
+results = trop(data, outcome='y', treatment='d', unit='id', time='t')
+results = bacon_decompose(data, outcome='y', unit='id', time='t', first_treat='ft')
+```
+
+## Results Objects
+
+### DiDResults
+
+Returned by `DifferenceInDifferences.fit()` and `TwoWayFixedEffects.fit()`.
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `att` | `float` | Average Treatment effect on the Treated |
+| `se` | `float` | Standard error of ATT |
+| `t_stat` | `float` | T-statistic |
+| `p_value` | `float` | P-value (H0: ATT = 0) |
+| `conf_int` | `tuple[float, float]` | Confidence interval |
+| `n_obs` | `int` | Number of observations |
+| `n_treated` | `int` | Number of treated units |
+| `n_control` | `int` | Number of control units |
+| `alpha` | `float` | Significance level |
+| `coefficients` | `dict` | All regression coefficients |
+| `vcov` | `np.ndarray` | Variance-covariance matrix |
+| `residuals` | `np.ndarray` | Regression residuals |
+| `fitted_values` | `np.ndarray` | Fitted values |
+| `r_squared` | `float` | R-squared |
+| `inference_method` | `str` | "analytical" or "wild_bootstrap" |
+| `n_bootstrap` | `int` | Number of bootstrap replications |
+| `n_clusters` | `int` | Number of clusters |
+| `bootstrap_distribution` | `np.ndarray` | Bootstrap ATT distribution |
+
+**Methods:** `summary(alpha=None)`, `print_summary()`, `to_dict()`, `to_dataframe()`
+
+**Properties:** `is_significant`, `significance_stars`
+
+### MultiPeriodDiDResults
+
+Returned by `MultiPeriodDiD.fit()`.
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `period_effects` | `dict[Any, PeriodEffect]` | Period-specific effects (pre and post) |
+| `avg_att` | `float` | Average ATT across post-periods |
+| `avg_se` | `float` | SE of average ATT |
+| `avg_t_stat` | `float` | T-statistic for average ATT |
+| `avg_p_value` | `float` | P-value for average ATT |
+| `avg_conf_int` | `tuple[float, float]` | CI for average ATT |
+| `n_obs` | `int` | Number of observations |
+| `n_treated` | `int` | Number of treated observations |
+| `n_control` | `int` | Number of control observations |
+| `pre_periods` | `list` | Pre-treatment period identifiers |
+| `post_periods` | `list` | Post-treatment period identifiers |
+| `reference_period` | `Any` | Reference (omitted) period |
+| `r_squared` | `float` | R-squared |
+| `vcov` | `np.ndarray` | Variance-covariance matrix |
+| `interaction_indices` | `dict` | Period to VCV column index mapping |
+
+**Methods:** `summary()`, `print_summary()`, `get_effect(period)`, `to_dict()`, `to_dataframe()`
+
+**Properties:** `pre_period_effects`, `post_period_effects`, `is_significant`, `significance_stars`
+
+### PeriodEffect
+
+Individual period treatment effect (used in MultiPeriodDiDResults).
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `period` | `Any` | Time period identifier |
+| `effect` | `float` | Treatment effect estimate |
+| `se` | `float` | Standard error |
+| `t_stat` | `float` | T-statistic |
+| `p_value` | `float` | P-value |
+| `conf_int` | `tuple[float, float]` | Confidence interval |
+
+**Properties:** `is_significant`, `significance_stars`
+
+### CallawaySantAnnaResults
+
+Returned by `CallawaySantAnna.fit()`.
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `group_time_effects` | `dict[(g,t), GroupTimeEffect]` | ATT(g,t) for each (group, time) |
+| `overall_att` | `float` | Overall ATT |
+| `overall_se` | `float` | SE of overall ATT |
+| `overall_t_stat` | `float` | T-statistic |
+| `overall_p_value` | `float` | P-value |
+| `overall_conf_int` | `tuple[float, float]` | CI for overall ATT |
+| `groups` | `list` | Treatment cohorts |
+| `time_periods` | `list` | All time periods |
+| `n_obs` | `int` | Number of observations |
+| `event_study_effects` | `dict[int, dict]` | Event study effects by relative time |
+| `group_effects` | `dict` | Group-level aggregated effects |
+
+**Methods:** `summary()`, `print_summary()`, `to_dataframe(level="event_study"|"group_time"|"group")`
+
+### SunAbrahamResults
+
+Returned by `SunAbraham.fit()`.
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `event_study_effects` | `dict[int, dict]` | Effects by relative time |
+| `overall_att` | `float` | Overall ATT |
+| `overall_se` | `float` | SE of overall ATT |
+| `overall_t_stat` | `float` | T-statistic |
+| `overall_p_value` | `float` | P-value |
+| `overall_conf_int` | `tuple[float, float]` | CI |
+| `cohort_weights` | `dict[int, dict]` | Interaction weights per period |
+| `groups` | `list` | Treatment cohorts |
+| `n_obs` | `int` | Number of observations |
+| `n_treated_units` | `int` | Number of ever-treated units |
+| `n_control_units` | `int` | Number of never-treated units |
+| `control_group` | `str` | Control group type used |
+| `cohort_effects` | `dict` | Cohort-level effects |
+
+**Methods:** `summary()`, `print_summary()`, `to_dataframe(level="event_study"|"cohort")`
+
+### SyntheticDiDResults
+
+Returned by `SyntheticDiD.fit()`.
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `att` | `float` | Average Treatment effect on the Treated |
+| `se` | `float` | Standard error (bootstrap or placebo-based) |
+| `t_stat` | `float` | T-statistic |
+| `p_value` | `float` | P-value |
+| `conf_int` | `tuple[float, float]` | Confidence interval |
+| `n_obs` | `int` | Number of observations |
+| `n_treated` | `int` | Number of treated units |
+| `n_control` | `int` | Number of control units |
+| `unit_weights` | `dict` | Control unit synthetic weights |
+| `time_weights` | `dict` | Pre-treatment time weights |
+| `pre_periods` | `list` | Pre-treatment periods |
+| `post_periods` | `list` | Post-treatment periods |
+| `variance_method` | `str` | "bootstrap" or "placebo" |
+| `noise_level` | `float` | Estimated noise level |
+| `zeta_omega` | `float` | Unit weight regularization |
+| `zeta_lambda` | `float` | Time weight regularization |
+| `pre_treatment_fit` | `float` | Pre-treatment RMSE |
+
+**Methods:** `summary()`, `print_summary()`, `to_dict()`, `to_dataframe()`, `get_unit_weights_df()`, `get_time_weights_df()`
+
+### TripleDifferenceResults
+
+Returned by `TripleDifference.fit()`.
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `att` | `float` | ATT estimate |
+| `se` | `float` | Standard error |
+| `t_stat` | `float` | T-statistic |
+| `p_value` | `float` | P-value |
+| `conf_int` | `tuple[float, float]` | Confidence interval |
+| `n_obs` | `int` | Total observations |
+| `n_treated_eligible` | `int` | Treated + eligible count |
+| `n_treated_ineligible` | `int` | Treated + ineligible count |
+| `n_control_eligible` | `int` | Control + eligible count |
+| `n_control_ineligible` | `int` | Control + ineligible count |
+| `estimation_method` | `str` | "dr", "reg", or "ipw" |
+| `group_means` | `dict` | Cell means |
+| `pscore_stats` | `dict` | Propensity score diagnostics |
+| `r_squared` | `float` | R-squared (for "reg") |
+
+**Methods:** `summary()`, `print_summary()`, `to_dict()`, `to_dataframe()`
+
+### BaconDecompositionResults
+
+Returned by `BaconDecomposition.fit()` and `bacon_decompose()`.
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `twfe_estimate` | `float` | Overall TWFE coefficient |
+| `comparisons` | `list[Comparison2x2]` | All 2x2 comparisons |
+| `total_weight_treated_vs_never` | `float` | Weight on treated vs never-treated |
+| `total_weight_earlier_vs_later` | `float` | Weight on earlier vs later |
+| `total_weight_later_vs_earlier` | `float` | Weight on forbidden comparisons |
+| `weighted_avg_treated_vs_never` | `float` | Avg effect from clean comparisons |
+| `weighted_avg_earlier_vs_later` | `float` | Avg effect from earlier vs later |
+| `weighted_avg_later_vs_earlier` | `float` | Avg effect from forbidden comparisons |
+| `n_timing_groups` | `int` | Number of treatment timing groups |
+| `n_never_treated` | `int` | Number of never-treated units |
+| `timing_groups` | `list` | Treatment timing cohorts |
+| `n_obs` | `int` | Total observations |
+| `decomposition_error` | `float` | Error: TWFE minus weighted sum |
+
+**Methods:** `summary()`, `print_summary()`, `to_dataframe()`
+
+### Comparison2x2
+
+Individual 2x2 DiD comparison (used in BaconDecompositionResults).
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `treated_group` | `Any` | Timing group used as treated |
+| `control_group` | `Any` | Timing group used as control |
+| `comparison_type` | `str` | "treated_vs_never", "earlier_vs_later", or "later_vs_earlier" |
+| `estimate` | `float` | 2x2 DiD estimate |
+| `weight` | `float` | Weight in TWFE average |
+| `n_treated` | `int` | Number of treated observations |
+| `n_control` | `int` | Number of control observations |
+| `time_window` | `tuple[float, float]` | (start, end) time window |
+
+### Common Results Pattern for Staggered Estimators
+
+ImputationDiDResults, TwoStageDiDResults, StackedDiDResults, and EfficientDiDResults share a similar structure:
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `overall_att` | `float` | Overall ATT |
+| `overall_se` | `float` | SE of overall ATT |
+| `overall_t_stat` | `float` | T-statistic |
+| `overall_p_value` | `float` | P-value |
+| `overall_conf_int` | `tuple[float, float]` | CI |
+| `event_study_effects` | `dict[int, dict]` | Event study effects (if aggregate includes event_study) |
+| `group_effects` | `dict` | Group-level effects (if aggregate includes group) |
+| `groups` | `list` | Treatment cohorts |
+| `time_periods` | `list` | All time periods |
+| `n_obs` | `int` | Number of observations |
+| `n_treated_units` | `int` | Number of treated units |
+| `n_control_units` | `int` | Number of control units |
+
+Each event study effect dict contains: `effect`, `se`, `t_stat`, `p_value`, `conf_int`, `n_obs` (or `n_groups`).
+
+**Methods:** `summary()`, `print_summary()`, `to_dataframe()`
+
+### ContinuousDiDResults
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `dose_response_att` | `DoseResponseCurve` | Dose-response curve for ATT |
+| `dose_response_acrt` | `DoseResponseCurve` | Dose-response curve for ACRT |
+| `overall_att` | `float` | Overall ATT |
+| `overall_att_se` | `float` | SE of overall ATT |
+| `overall_att_t_stat` | `float` | T-statistic for ATT |
+| `overall_att_p_value` | `float` | P-value for ATT |
+| `overall_att_conf_int` | `tuple[float, float]` | CI for ATT |
+| `overall_acrt` | `float` | Overall ACRT |
+| `overall_acrt_se` | `float` | SE of overall ACRT |
+| `overall_acrt_t_stat` | `float` | T-statistic for ACRT |
+| `overall_acrt_p_value` | `float` | P-value for ACRT |
+| `overall_acrt_conf_int` | `tuple[float, float]` | CI for ACRT |
+| `group_time_effects` | `dict[tuple, dict]` | Group-time level effects |
+| `dose_grid` | `np.ndarray` | Evaluation grid for dose-response |
+| `groups` | `list` | Treatment cohorts |
+| `time_periods` | `list` | All time periods |
+| `n_obs` | `int` | Number of observations |
+| `n_treated_units` | `int` | Treated units |
+| `n_control_units` | `int` | Control units |
+| `event_study_effects` | `dict[int, dict] or None` | Event study effects (if `aggregate="eventstudy"`) |
+
+**DoseResponseCurve** sub-dataclass:
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `dose_grid` | `np.ndarray` | Dose values |
+| `effects` | `np.ndarray` | Estimated effects at each dose |
+| `se` | `np.ndarray` | Standard errors |
+| `conf_int_lower` | `np.ndarray` | Lower CI bound |
+| `conf_int_upper` | `np.ndarray` | Upper CI bound |
+| `target` | `str` | `"att"` or `"acrt"` |
+
+**Methods:** `summary()`, `print_summary()`, `to_dataframe()`
+
+### TROPResults
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `att` | `float` | ATT estimate |
+| `se` | `float` | Bootstrap standard error |
+| `t_stat` | `float` | T-statistic |
+| `p_value` | `float` | P-value |
+| `conf_int` | `tuple[float, float]` | CI |
+| `n_obs` | `int` | Number of observations |
+| `n_treated` | `int` | Number of treated units |
+| `n_control` | `int` | Number of control units |
+| `n_treated_obs` | `int` | Number of treated unit-time observations |
+| `lambda_time` | `float` | Selected time decay parameter |
+| `lambda_unit` | `float` | Selected unit decay parameter |
+| `lambda_nn` | `float` | Selected nuclear norm parameter |
+| `n_bootstrap` | `int` | Number of bootstrap replications |
+
+**Methods:** `summary()`, `print_summary()`, `to_dict()`, `to_dataframe()`
+
+## Diagnostics
+
+### Placebo Tests
+
+```python
+from diff_diff import (
+    run_placebo_test,
+    placebo_timing_test,
+    placebo_group_test,
+    permutation_test,
+    leave_one_out_test,
+    run_all_placebo_tests,
+)
+
+# Unified interface
+results = run_placebo_test(
+    data, outcome='y', treatment='treated', time='period',
+    test_type='fake_timing',           # "fake_timing", "fake_group", "permutation", "leave_one_out"
+    fake_treatment_period=1,           # For fake_timing
+    post_periods=[3, 4, 5],
+)
+
+# Run all tests at once
+all_results = run_all_placebo_tests(
+    data, outcome='y', treatment='treated', time='period', unit='unit_id',
+    pre_periods=[0, 1, 2], post_periods=[3, 4, 5],
+    n_permutations=500, seed=42,
+)
+```
+
+**Individual test functions:**
+
+```python
+# Fake timing test
+placebo_timing_test(data, outcome, treatment, time,
+                    fake_treatment_period, post_periods=None, alpha=0.05)
+
+# Fake group test
+placebo_group_test(data, outcome, time, unit,
+                   fake_treated_units, post_periods=None, alpha=0.05)
+
+# Permutation test
+permutation_test(data, outcome, treatment, time, unit,
+                 n_permutations=1000, alpha=0.05, seed=None)
+
+# Leave-one-out test
+leave_one_out_test(data, outcome, treatment, time, unit, alpha=0.05)
+```
+
+All return `PlaceboTestResults` with attributes: `test_type`, `placebo_effect`, `se`, `t_stat`, `p_value`, `conf_int`, `n_obs`, `is_significant`.
+
+### Parallel Trends Testing
+
+```python
+from diff_diff import check_parallel_trends, check_parallel_trends_robust, equivalence_test_trends
+
+# Simple trend comparison
+result = check_parallel_trends(
+    data, outcome='y', time='period', treatment_group='treated',
+    pre_periods=[0, 1, 2],
+)
+
+# Distributional comparison (Wasserstein distance + permutation inference)
+result = check_parallel_trends_robust(
+    data, outcome='y', time='period', treatment_group='treated',
+    unit='unit_id', pre_periods=[0, 1, 2],
+    n_permutations=1000, seed=42,
+)
+
+# TOST equivalence test
+result = equivalence_test_trends(
+    data, outcome='y', time='period', treatment_group='treated',
+    unit='unit_id', pre_periods=[0, 1, 2],
+    equivalence_margin=0.5,
+)
+```
+
+### Wild Cluster Bootstrap
+
+```python
+from diff_diff import wild_bootstrap_se, WildBootstrapResults
+
+# Directly via estimator
+did = DifferenceInDifferences(inference="wild_bootstrap", n_bootstrap=999,
+                              bootstrap_weights="webb", cluster="state")
+results = did.fit(data, outcome='y', treatment='treated', time='post')
+```
+
+## Honest DiD Sensitivity Analysis
+
+Rambachan & Roth (2023) robust inference allowing bounded parallel trends violations.
+
+### Delta Restriction Classes
+
+```python
+from diff_diff import DeltaSD, DeltaRM, DeltaSDRM
+
+# Smoothness: bounds on second differences
+delta_sd = DeltaSD(M=0.5)
+
+# Relative magnitudes: post violations <= Mbar * max pre violation
+delta_rm = DeltaRM(Mbar=1.0)
+
+# Combined restriction
+delta_sdrm = DeltaSDRM(M=0.5, Mbar=1.0)
+```
+
+### HonestDiD Class
+
+```python
+from diff_diff import HonestDiD
+
+honest = HonestDiD(
+    method="relative_magnitude",     # "smoothness", "relative_magnitude", or "combined"
+    M=1.0,                           # Restriction parameter
+    alpha=0.05,
+    l_vec=None,                      # Weighting vector (None = uniform)
+)
+
+# Fit to event study results
+bounds = honest.fit(event_study_results)
+print(bounds.summary())
+
+# Sensitivity analysis over M grid
+sensitivity = honest.sensitivity_analysis(
+    event_study_results,
+    M_grid=[0, 0.5, 1.0, 1.5, 2.0],
+)
+sensitivity.plot()
+```
+
+### Convenience Functions
+
+```python
+from diff_diff import compute_honest_did, sensitivity_plot
+
+bounds = compute_honest_did(results, method="relative_magnitude", M=1.0, alpha=0.05)
+sensitivity_plot(results, method="relative_magnitude", M_grid=[0, 0.5, 1, 1.5, 2])
+```
+
+### HonestDiDResults
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `lb` | `float` | Lower bound of identified set |
+| `ub` | `float` | Upper bound of identified set |
+| `ci_lb` | `float` | Lower bound of robust CI |
+| `ci_ub` | `float` | Upper bound of robust CI |
+| `M` | `float` | Restriction parameter value |
+| `method` | `str` | Restriction type |
+| `original_estimate` | `float` | Original point estimate |
+| `original_se` | `float` | Original SE |
+| `ci_method` | `str` | "FLCI" or "C-LF" |
+| `event_study_bounds` | `dict` | Per-period bounds (optional) |
+
+**Properties:** `is_significant` (CI excludes zero)
+
+## Power Analysis
+
+```python
+from diff_diff import PowerAnalysis, compute_mde, compute_power, compute_sample_size, simulate_power
+
+# Class-based interface
+pa = PowerAnalysis(alpha=0.05, power=0.80, alternative='two-sided')
+mde_result = pa.mde(n_treated=50, n_control=50, sigma=1.0)
+sample_result = pa.sample_size(effect_size=0.5, sigma=1.0)
+power_result = pa.power(effect_size=0.5, n_treated=50, n_control=50, sigma=1.0)
+
+# Convenience functions
+mde_result = compute_mde(n_treated=50, n_control=50, sigma=1.0)
+power_result = compute_power(effect_size=0.5, n_treated=50, n_control=50, sigma=1.0)
+sample_result = compute_sample_size(effect_size=0.5, sigma=1.0)
+
+# Simulation-based power
+sim_result = simulate_power(
+    n_units=200, n_periods=8, treatment_period=4,
+    effect_sizes=[0.1, 0.5, 1.0, 2.0],
+    n_simulations=500, seed=42,
+)
+```
+
+## Pre-Trends Power Analysis
+
+```python
+from diff_diff import PreTrendsPower, compute_pretrends_power, compute_mdv
+
+# Class-based
+ptp = PreTrendsPower()
+results = ptp.compute(event_study_results, M_grid=[0, 0.5, 1.0, 2.0])
+
+# Convenience functions
+results = compute_pretrends_power(event_study_results, M_grid=[0, 0.5, 1.0, 2.0])
+mdv = compute_mdv(event_study_results, target_power=0.80)
+```
+
+## Visualization
+
+All plotting functions return a matplotlib `Figure` object.
+
+### plot_event_study
+
+```python
+from diff_diff import plot_event_study
+
+plot_event_study(
+    results,                           # MultiPeriodDiDResults, CS, SA, BJS, Gardner, Stacked, or DataFrame
+    effects=None,                      # Manual dict of effects (alternative to results)
+    se=None,                           # Manual dict of SEs
+    periods=None,
+    reference_period=None,
+    pre_periods=None,
+    post_periods=None,
+    alpha=0.05,
+    figsize=(10, 6),
+    title="Event Study",
+    xlabel="Period Relative to Treatment",
+    ylabel="Treatment Effect",
+    color="#2563eb",
+    show_zero_line=True,
+    show_reference_line=True,
+    shade_pre=True,
+    ax=None,
+    show=True,
+    use_cband=True,                    # Use simultaneous confidence bands if available
+)
+```
+
+### plot_group_effects
+
+```python
+from diff_diff import plot_group_effects
+
+plot_group_effects(
+    results,                           # CallawaySantAnnaResults
+    groups=None,
+    figsize=(10, 6),
+    title="Treatment Effects by Cohort",
+    alpha=0.05,
+    show=True,
+    ax=None,
+)
+```
+
+### plot_sensitivity
+
+```python
+from diff_diff import plot_sensitivity
+
+plot_sensitivity(
+    sensitivity_results,               # SensitivityResults from HonestDiD
+    show_bounds=True,
+    show_ci=True,
+    breakdown_line=True,
+    figsize=(10, 6),
+    title="Honest DiD Sensitivity Analysis",
+    ax=None,
+    show=True,
+)
+```
+
+### plot_honest_event_study
+
+```python
+from diff_diff import plot_honest_event_study
+
+plot_honest_event_study(
+    honest_results,                    # HonestDiDResults with event_study_bounds
+    periods=None,
+    reference_period=None,
+    figsize=(10, 6),
+    title="Event Study with Honest Confidence Intervals",
+    ax=None,
+    show=True,
+)
+```
+
+### plot_bacon
+
+```python
+from diff_diff import plot_bacon
+
+plot_bacon(
+    results,                           # BaconDecompositionResults
+    plot_type="scatter",               # "scatter" or "bar"
+    figsize=(10, 6),
+    show_weighted_avg=True,
+    show_twfe_line=True,
+    ax=None,
+    show=True,
+)
+```
+
+### plot_power_curve
+
+```python
+from diff_diff import plot_power_curve
+
+plot_power_curve(
+    results=None,                      # PowerResults, SimulationPowerResults, or DataFrame
+    effect_sizes=None,
+    powers=None,
+    mde=None,
+    target_power=0.80,
+    plot_type="effect",                # "effect" or "sample_size"
+    figsize=(10, 6),
+    show_mde_line=True,
+    show_target_line=True,
+    ax=None,
+    show=True,
+)
+```
+
+### plot_pretrends_power
+
+```python
+from diff_diff import plot_pretrends_power
+
+plot_pretrends_power(
+    results=None,                      # PreTrendsPowerResults or PreTrendsPowerCurve
+    M_values=None,
+    powers=None,
+    mdv=None,
+    target_power=0.80,
+    figsize=(10, 6),
+    ax=None,
+    show=True,
+)
+```
+
+## Data Preparation Utilities
+
+### Data Manipulation
+
+```python
+from diff_diff import (
+    make_treatment_indicator,
+    make_post_indicator,
+    wide_to_long,
+    balance_panel,
+    validate_did_data,
+    summarize_did_data,
+    create_event_time,
+    aggregate_to_cohorts,
+    rank_control_units,
+)
+
+# Create binary treatment indicator
+df = make_treatment_indicator(data, column='group', treated_values='A', new_column='treated')
+df = make_treatment_indicator(data, column='size', threshold=75, new_column='treated')
+
+# Create binary post indicator
+df = make_post_indicator(data, time_column='year', treatment_start=2020, new_column='post')
+df = make_post_indicator(data, time_column='year', post_periods=[2020, 2021])
+
+# Reshape wide to long
+long_df = wide_to_long(data, value_columns=['y2018', 'y2019', 'y2020'],
+                       id_column='unit', time_name='year', value_name='outcome')
+
+# Balance panel (keep only units observed in all periods)
+balanced_df = balance_panel(data, unit='unit', time='period')
+
+# Validate DiD data
+validation = validate_did_data(data, outcome='y', treatment='treated',
+                               time='period', unit='unit')
+
+# Summarize DiD data
+summary = summarize_did_data(data, outcome='y', treatment='treated',
+                             time='period', unit='unit')
+
+# Create event time column
+df = create_event_time(data, time='period', first_treat='first_treat', new_column='event_time')
+
+# Aggregate to cohort level
+cohort_df = aggregate_to_cohorts(data, outcome='y', unit='unit', time='period',
+                                 first_treat='first_treat')
+
+# Rank control units by similarity to treated
+ranking = rank_control_units(data, outcome='y', unit='unit', time='period',
+                             treatment='treated')
+```
+
+### Data Generation
+
+```python
+from diff_diff import (
+    generate_did_data,
+    generate_staggered_data,
+    generate_panel_data,
+    generate_event_study_data,
+    generate_factor_data,
+    generate_ddd_data,
+    generate_continuous_did_data,
+)
+
+# Basic 2x2 DiD data
+data = generate_did_data(n_units=100, n_periods=4, treatment_effect=5.0,
+                         treatment_fraction=0.5, treatment_period=2, seed=42)
+
+# Staggered adoption data
+data = generate_staggered_data(n_units=100, n_periods=10,
+                               treatment_effect=2.0, dynamic_effects=True,
+                               never_treated_frac=0.3, seed=42)
+
+# Panel data with optional trend violations
+data = generate_panel_data(n_units=100, n_periods=8, treatment_period=4,
+                           parallel_trends=True, seed=42)
+
+# Event study data
+data = generate_event_study_data(n_units=300, n_pre=5, n_post=5,
+                                 treatment_effect=5.0, seed=42)
+
+# Factor model data (for TROP)
+data = generate_factor_data(n_units=50, n_pre=10, n_post=5,
+                            n_treated=10, n_factors=2, seed=42)
+
+# Triple difference data
+data = generate_ddd_data(n_per_cell=100, treatment_effect=2.0, seed=42)
+
+# Continuous dose data
+data = generate_continuous_did_data(n_units=500, n_periods=4,
+                                    att_function="linear", att_slope=2.0, seed=42)
+```
+
+## Real-World Datasets
+
+```python
+from diff_diff import load_card_krueger, load_castle_doctrine, load_divorce_laws, load_mpdta
+from diff_diff import load_dataset, list_datasets, clear_cache
+
+# List available datasets
+for name, desc in list_datasets().items():
+    print(f"{name}: {desc}")
+
+# Load by name
+data = load_dataset("card_krueger")
+
+# Named loaders
+ck = load_card_krueger()          # Card & Krueger (1994) minimum wage
+castle = load_castle_doctrine()    # Castle Doctrine / Stand Your Ground laws
+divorce = load_divorce_laws()      # Unilateral divorce laws (staggered)
+mpdta = load_mpdta()              # Minimum wage panel (simulated, from R did package)
+
+# Force re-download
+data = load_card_krueger(force_download=True)
+
+# Clear local cache
+clear_cache()
+```
+
+## Linear Algebra Helpers
+
+```python
+from diff_diff import LinearRegression, InferenceResult
+
+# Low-level regression helper
+reg = LinearRegression(
+    include_intercept=True,
+    robust=True,
+    cluster_ids=cluster_array,
+)
+reg.fit(X, y)
+inference = reg.get_inference(coef_index)  # -> InferenceResult
+```
+
+### InferenceResult
+
+| Attribute | Type | Description |
+|-----------|------|-------------|
+| `coefficient` | `float` | Point estimate |
+| `se` | `float` | Standard error |
+| `t_stat` | `float` | T-statistic |
+| `p_value` | `float` | P-value |
+| `conf_int` | `tuple[float, float]` | Confidence interval |
+
+## Rust Backend
+
+diff-diff includes an optional Rust backend for performance-critical operations.
+
+```python
+from diff_diff import HAS_RUST_BACKEND
+
+if HAS_RUST_BACKEND:
+    print("Rust backend available - computations will be faster")
+```
+
+The Rust backend accelerates: OLS solving, robust VCV computation, bootstrap weight generation, synthetic control weights, and simplex projection. It is used transparently when available. Force backend selection via environment variables:
+
+```bash
+DIFF_DIFF_BACKEND=python pytest   # Force pure Python
+DIFF_DIFF_BACKEND=rust pytest     # Force Rust (fail if unavailable)
+```
+
+## Choosing an Estimator
+
+| Scenario | Recommended Estimator |
+|----------|----------------------|
+| Classic 2x2 design (one treated group, one time split) | `DifferenceInDifferences` |
+| Panel data with unit + time FE | `TwoWayFixedEffects` |
+| Event study with multiple periods | `MultiPeriodDiD` |
+| Staggered treatment timing | `CallawaySantAnna`, `ImputationDiD`, or `SunAbraham` |
+| Few treated units / synthetic control | `SyntheticDiD` |
+| Interactive fixed effects / factor confounding | `TROP` |
+| Continuous treatment intensity | `ContinuousDiD` |
+| Two-criterion treatment (group + eligibility) | `TripleDifference` |
+| Diagnosing TWFE bias | `BaconDecomposition` |
+| Efficiency-optimal estimation | `EfficientDiD` |
+| Corrective weighting for stacked regressions | `StackedDiD` |
+| Robustness to parallel trends violations | `HonestDiD` |
diff --git a/docs/llms.txt b/docs/llms.txt
new file mode 100644
index 0000000..d0db70a
--- /dev/null
+++ b/docs/llms.txt
@@ -0,0 +1,71 @@
+# diff-diff
+
+> A Python library for Difference-in-Differences (DiD) causal inference analysis. Provides sklearn-like estimators with statsmodels-style summary output for econometric analysis.
+
+diff-diff offers 14 estimators covering basic 2x2 DiD, modern staggered adoption methods, advanced panel estimators, and diagnostic tools. It supports robust and cluster-robust standard errors, wild cluster bootstrap, formula and column-name interfaces, fixed effects (dummy and absorbed), and publication-ready output. The optional Rust backend accelerates compute-intensive estimators like Synthetic DiD and TROP.
+
+- Install: `pip install diff-diff`
+- License: MIT
+- Dependencies: numpy, pandas, scipy (no statsmodels dependency)
+- Source: https://github.com/igerber/diff-diff
+- Docs: https://diff-diff.readthedocs.io/en/stable/
+
+## Documentation
+
+- [Quickstart](https://diff-diff.readthedocs.io/en/stable/quickstart.html): Get started with basic examples — column-name and formula interfaces, covariates, fixed effects, cluster-robust SEs
+- [Choosing an Estimator](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html): Decision flowchart for selecting the right estimator for your research design
+- [API Reference](https://diff-diff.readthedocs.io/en/stable/api/index.html): Full API documentation for all estimators, results classes, diagnostics, and utilities
+- [R Comparison](https://diff-diff.readthedocs.io/en/stable/r_comparison.html): Side-by-side comparison with R packages (did, fixest, synthdid, didimputation, did2s, stackedev)
+- [Python Comparison](https://diff-diff.readthedocs.io/en/stable/python_comparison.html): Comparison with Python DiD packages
+- [Benchmarks](https://diff-diff.readthedocs.io/en/stable/benchmarks.html): Validation results and performance benchmarks vs R
+- [Troubleshooting](https://diff-diff.readthedocs.io/en/stable/troubleshooting.html): Common issues and solutions
+
+## Estimators
+
+- [DifferenceInDifferences](https://diff-diff.readthedocs.io/en/stable/api/estimators.html): Basic 2x2 DiD with robust/cluster-robust SEs, wild bootstrap, formula interface, and fixed effects
+- [TwoWayFixedEffects](https://diff-diff.readthedocs.io/en/stable/api/estimators.html): Panel data DiD with unit and time fixed effects via within-transformation or dummies
+- [MultiPeriodDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html): Event study design with period-specific treatment effects for dynamic analysis
+- [CallawaySantAnna](https://diff-diff.readthedocs.io/en/stable/api/staggered.html): Callaway & Sant'Anna (2021) group-time ATT estimator for staggered adoption with aggregation
+- [SunAbraham](https://diff-diff.readthedocs.io/en/stable/api/staggered.html): Sun & Abraham (2021) interaction-weighted estimator for heterogeneity-robust event studies
+- [ImputationDiD](https://diff-diff.readthedocs.io/en/stable/api/imputation.html): Borusyak, Jaravel & Spiess (2024) imputation estimator — most efficient under homogeneous effects
+- [TwoStageDiD](https://diff-diff.readthedocs.io/en/stable/api/two_stage.html): Gardner (2022) two-stage estimator with GMM sandwich variance
+- [SyntheticDiD](https://diff-diff.readthedocs.io/en/stable/api/estimators.html): Synthetic DiD combining standard DiD and synthetic control methods for few treated units
+- [TripleDifference](https://diff-diff.readthedocs.io/en/stable/api/triple_diff.html): Triple difference (DDD) estimator for designs requiring two criteria for treatment eligibility
+- [ContinuousDiD](https://diff-diff.readthedocs.io/en/stable/api/continuous_did.html): Callaway, Goodman-Bacon & Sant'Anna (2024) continuous treatment DiD with dose-response curves
+- [StackedDiD](https://diff-diff.readthedocs.io/en/stable/api/stacked_did.html): Wing, Freedman & Hollingsworth (2024) stacked DiD with Q-weights and sub-experiments
+- [EfficientDiD](https://diff-diff.readthedocs.io/en/stable/api/efficient_did.html): Chen, Sant'Anna & Xie (2025) efficient DiD with optimal weighting for tighter SEs
+- [TROP](https://diff-diff.readthedocs.io/en/stable/api/trop.html): Triply Robust Panel estimator (Athey et al. 2025) with nuclear norm factor adjustment
+- [BaconDecomposition](https://diff-diff.readthedocs.io/en/stable/api/bacon.html): Goodman-Bacon (2021) decomposition for diagnosing TWFE bias in staggered settings
+
+## Diagnostics and Sensitivity Analysis
+
+- [Parallel Trends Testing](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html): Simple and Wasserstein-robust parallel trends tests, equivalence testing (TOST)
+- [Placebo Tests](https://diff-diff.readthedocs.io/en/stable/api/diagnostics.html): Placebo timing, group, permutation, and leave-one-out diagnostics
+- [Honest DiD](https://diff-diff.readthedocs.io/en/stable/api/honest_did.html): Rambachan & Roth (2023) sensitivity analysis — robust CI under parallel trends violations, breakdown values
+- [Pre-Trends Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/pretrends.html): Roth (2022) minimum detectable violation and pre-trends test power curves
+- [Power Analysis](https://diff-diff.readthedocs.io/en/stable/api/power.html): Analytical and simulation-based power analysis — MDE, sample size, power curves for study design
+
+## Tutorials
+
+- [01 Basic DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/01_basic_did.html): Introduction to 2x2 DiD — column-name and formula interfaces, covariates, fixed effects, TWFE, bootstrap
+- [02 Staggered DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/02_staggered_did.html): Handling staggered treatment adoption with Callaway-Sant'Anna, Bacon decomposition, and aggregation
+- [03 Synthetic DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/03_synthetic_did.html): Synthetic DiD for few treated units — unit/time weights, diagnostics, regularization tuning
+- [04 Parallel Trends](https://diff-diff.readthedocs.io/en/stable/tutorials/04_parallel_trends.html): Testing assumptions — visual inspection, robust tests, equivalence testing, placebo tests
+- [05 Honest DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/05_honest_did.html): Sensitivity analysis for parallel trends violations — relative magnitudes, smoothness, breakdown values
+- [06 Power Analysis](https://diff-diff.readthedocs.io/en/stable/tutorials/06_power_analysis.html): Study design — MDE, sample size, power curves, panel data considerations, simulation-based power
+- [07 Pre-Trends Power](https://diff-diff.readthedocs.io/en/stable/tutorials/07_pretrends_power.html): Roth (2022) pre-trends power — MDV, power curves, violation types, integration with Honest DiD
+- [08 Triple Difference](https://diff-diff.readthedocs.io/en/stable/tutorials/08_triple_diff.html): DDD estimation — two-criteria treatment, estimation methods (regression, IPW, doubly robust), covariates
+- [09 Real-World Examples](https://diff-diff.readthedocs.io/en/stable/tutorials/09_real_world_examples.html): Card & Krueger minimum wage, Castle Doctrine laws, unilateral divorce laws with built-in datasets
+- [10 TROP](https://diff-diff.readthedocs.io/en/stable/tutorials/10_trop.html): Triply robust panel estimation — factor adjustment, LOOCV tuning, comparison with Synthetic DiD
+- [11 Imputation DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/11_imputation_did.html): Borusyak et al. imputation estimator — event study, pre-trend test, efficiency comparison
+- [12 Two-Stage DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/12_two_stage_did.html): Gardner two-stage estimator — GMM sandwich variance, per-observation treatment effects
+- [13 Stacked DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/13_stacked_did.html): Stacked DiD — sub-experiments, Q-weights, event windows, trimming, clean control definitions
+- [14 Continuous DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/14_continuous_did.html): Continuous treatment DiD — dose-response curves, ATT(d), ACRT, B-splines, event study diagnostics
+- [15 Efficient DiD](https://diff-diff.readthedocs.io/en/stable/tutorials/15_efficient_did.html): Chen, Sant'Anna & Xie (2025) efficient DiD — optimal weighting, PT-All vs PT-Post, efficiency gains
+
+## Optional
+
+- [Rust Backend](https://diff-diff.readthedocs.io/en/stable/benchmarks.html): Optional Rust backend (`maturin develop --release`) for 5-50x speedups on Synthetic DiD, TROP, and other compute-intensive estimators
+- [Built-in Datasets](https://diff-diff.readthedocs.io/en/stable/api/datasets.html): Real-world datasets — Card & Krueger (1994), Castle Doctrine, divorce laws, MPDTA
+- [Visualization](https://diff-diff.readthedocs.io/en/stable/api/visualization.html): Event study plots, group effects, sensitivity plots, Bacon decomposition plots, power curves
+- [Data Preparation](https://diff-diff.readthedocs.io/en/stable/api/prep.html): Data generation, panel balancing, wide-to-long conversion, treatment/post indicator creation
diff --git a/docs/python_comparison.rst b/docs/python_comparison.rst
index c65fdfa..d94250e 100644
--- a/docs/python_comparison.rst
+++ b/docs/python_comparison.rst
@@ -1,3 +1,7 @@
+.. meta::
+   :description: Compare diff-diff with other Python DiD libraries including pyfixest, causalimpact, and linearmodels. Feature matrix, API comparison, and migration guide.
+   :keywords: python DiD library comparison, pyfixest vs diff-diff, causalimpact alternative, python difference-in-differences packages
+
 Comparison with Python Packages
 ================================
 
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
index 7d4f927..83cfe01 100644
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -1,3 +1,7 @@
+.. meta::
+   :description: Get started with diff-diff for Difference-in-Differences analysis in Python. Step-by-step tutorial covering basic DiD, formulas, covariates, and robust inference.
+   :keywords: difference-in-differences tutorial, DiD python getting started, causal inference quickstart
+
 Getting Started
 ===============
 
diff --git a/docs/r_comparison.rst b/docs/r_comparison.rst
index 8096e48..96e61e1 100644
--- a/docs/r_comparison.rst
+++ b/docs/r_comparison.rst
@@ -1,3 +1,7 @@
+.. meta::
+   :description: Compare diff-diff with R packages for DiD analysis. Migration guide from R did, fixest, synthdid, and HonestDiD to Python with side-by-side code examples.
+   :keywords: R did package python alternative, fixest python, synthdid python, R to python DiD, econometrics R vs python
+
 Comparison with R Packages
 ==========================
 
diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst
index 3fa925a..d1a7770 100644
--- a/docs/troubleshooting.rst
+++ b/docs/troubleshooting.rst
@@ -1,3 +1,7 @@
+.. meta::
+   :description: Troubleshooting guide for diff-diff. Solutions for common DiD issues including singular matrices, collinear covariates, insufficient variation, and convergence problems.
+   :keywords: difference-in-differences troubleshooting, DiD singular matrix, collinear covariates fix, parallel trends test fails
+
 Troubleshooting
 ===============
 
diff --git a/pyproject.toml b/pyproject.toml
index b040d43..5dcc7e9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ build-backend = "maturin"
 [project]
 name = "diff-diff"
 version = "2.7.1"
-description = "A library for Difference-in-Differences causal inference analysis"
+description = "Difference-in-Differences causal inference with sklearn-like API. Callaway-Sant'Anna, Synthetic DiD, Honest DiD, event studies, parallel trends."
 readme = "README.md"
 license = "MIT"
 requires-python = ">=3.9,<3.14"
@@ -18,6 +18,16 @@ keywords = [
     "econometrics",
     "statistics",
     "treatment-effects",
+    "event-study",
+    "staggered-adoption",
+    "parallel-trends",
+    "synthetic-control",
+    "panel-data",
+    "did",
+    "twfe",
+    "callaway-santanna",
+    "honest-did",
+    "sensitivity-analysis",
 ]
 classifiers = [
     "Development Status :: 5 - Production/Stable",
@@ -30,6 +40,8 @@ classifiers = [
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
     "Topic :: Scientific/Engineering :: Mathematics",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+    "Topic :: Scientific/Engineering",
 ]
 dependencies = [
     "numpy>=1.20.0",
@@ -50,7 +62,11 @@ dev = [
 ]
 docs = [
     "sphinx>=6.0",
-    "sphinx-rtd-theme>=1.0",
+    "pydata-sphinx-theme>=0.15",
+    "sphinxext-opengraph>=0.9",
+    "sphinx-sitemap>=2.5",
+    "nbsphinx>=0.9",
+    "matplotlib>=3.5",
 ]
 
 [project.urls]