From af51b6b8cce2ad9c8e5351274d03c6518cf2bb89 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Tue, 17 Mar 2026 19:47:18 +0100 Subject: [PATCH 1/5] feat: Fix economic_impact_analysis with correct programs, budget summary, and poverty demographics Fixes #258 - Add BudgetSummaryItem output and compute_budget_summary() for per-variable budget analysis - Add CountryConfig strategy pattern (US_CONFIG, UK_CONFIG) replacing country conditionals - Add compute_decile_impacts() accepting already-run simulations - Add compute_program_statistics() shared function for both US and UK - Add PolicyReformAnalysis unified result container - Fix US program names and entities to match API (employee_payroll_tax, ssi on spm_unit) - Add intra-decile impacts, poverty by demographics (age/gender/race) to analysis - Fix StopIteration crashes in Aggregate/ChangeAggregate with clear ValueError messages - Add model_rebuild() for ProgramStatistics/ProgrammeStatistics and BudgetSummaryItem - Update outputs __init__.py exports Co-Authored-By: Claude Opus 4.6 --- src/policyengine/outputs/__init__.py | 16 ++ src/policyengine/outputs/aggregate.py | 24 ++- src/policyengine/outputs/budget_summary.py | 94 ++++++++++ src/policyengine/outputs/change_aggregate.py | 24 ++- src/policyengine/outputs/country_config.py | 84 +++++++++ src/policyengine/outputs/decile_impact.py | 57 ++++++ .../outputs/policy_reform_analysis.py | 60 ++++++ .../outputs/program_statistics.py | 108 +++++++++++ .../tax_benefit_models/uk/__init__.py | 8 + .../tax_benefit_models/uk/analysis.py | 166 +++++++---------- .../tax_benefit_models/us/__init__.py | 8 + .../tax_benefit_models/us/analysis.py | 174 ++++++++---------- 12 files changed, 616 insertions(+), 207 deletions(-) create mode 100644 src/policyengine/outputs/budget_summary.py create mode 100644 src/policyengine/outputs/country_config.py create mode 100644 src/policyengine/outputs/policy_reform_analysis.py create mode 100644 src/policyengine/outputs/program_statistics.py diff --git a/src/policyengine/outputs/__init__.py b/src/policyengine/outputs/__init__.py index d426f743..f805310d 100644 --- a/src/policyengine/outputs/__init__.py +++ b/src/policyengine/outputs/__init__.py @@ -1,5 +1,9 @@ from policyengine.core import Output, OutputCollection from policyengine.outputs.aggregate import Aggregate, AggregateType +from policyengine.outputs.budget_summary import ( + BudgetSummaryItem, + compute_budget_summary, +) from policyengine.outputs.change_aggregate import ( ChangeAggregate, ChangeAggregateType, @@ -12,9 +16,11 @@ ConstituencyImpact, compute_uk_constituency_impacts, ) +from policyengine.outputs.country_config import UK_CONFIG, US_CONFIG, CountryConfig from policyengine.outputs.decile_impact import ( DecileImpact, calculate_decile_impacts, + compute_decile_impacts, ) from policyengine.outputs.inequality import ( UK_INEQUALITY_INCOME_VARIABLE, @@ -31,6 +37,7 @@ LocalAuthorityImpact, compute_uk_local_authority_impacts, ) +from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis from policyengine.outputs.poverty import ( AGE_GROUPS, GENDER_GROUPS, @@ -48,6 +55,7 @@ calculate_us_poverty_by_race, calculate_us_poverty_rates, ) +from policyengine.outputs.program_statistics import compute_program_statistics __all__ = [ "Output", @@ -86,4 +94,12 @@ "compute_uk_constituency_impacts", "LocalAuthorityImpact", "compute_uk_local_authority_impacts", + "BudgetSummaryItem", + "compute_budget_summary", + "compute_decile_impacts", + "compute_program_statistics", + "PolicyReformAnalysis", + "CountryConfig", + "US_CONFIG", + "UK_CONFIG", ] diff --git a/src/policyengine/outputs/aggregate.py b/src/policyengine/outputs/aggregate.py index 9406a4d7..09189114 100644 --- a/src/policyengine/outputs/aggregate.py +++ b/src/policyengine/outputs/aggregate.py @@ -47,10 +47,15 @@ def run(self): # Get variable object var_obj = next( - v - for v in self.simulation.tax_benefit_model_version.variables - if v.name == self.variable + ( + v + for v in self.simulation.tax_benefit_model_version.variables + if v.name == self.variable + ), + None, ) + if var_obj is None: + raise ValueError(f"Variable '{self.variable}' not found in model") # Get the target entity data target_entity = self.entity or var_obj.entity @@ -68,10 +73,17 @@ def run(self): # Apply filters if self.filter_variable is not None: filter_var_obj = next( - v - for v in self.simulation.tax_benefit_model_version.variables - if v.name == self.filter_variable + ( + v + for v in self.simulation.tax_benefit_model_version.variables + if v.name == self.filter_variable + ), + None, ) + if filter_var_obj is None: + raise ValueError( + f"Filter variable '{self.filter_variable}' not found in model" + ) if filter_var_obj.entity != target_entity: filter_mapped = self.simulation.output_dataset.data.map_to_entity( diff --git a/src/policyengine/outputs/budget_summary.py b/src/policyengine/outputs/budget_summary.py new file mode 100644 index 00000000..79d1655e --- /dev/null +++ b/src/policyengine/outputs/budget_summary.py @@ -0,0 +1,94 @@ +"""Budget summary output — totals for key budget variables under baseline and reform.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pandas as pd +from pydantic import ConfigDict + +from policyengine.core import Output, OutputCollection +from policyengine.outputs.aggregate import Aggregate, AggregateType + +if TYPE_CHECKING: + from policyengine.core.simulation import Simulation + + +class BudgetSummaryItem(Output): + """One row of the budget summary — totals for a single variable.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + baseline_simulation: Simulation + reform_simulation: Simulation + variable_name: str + entity: str + + # Results populated by run() + baseline_total: float | None = None + reform_total: float | None = None + change: float | None = None + + def run(self): + baseline_agg = Aggregate( + simulation=self.baseline_simulation, + variable=self.variable_name, + aggregate_type=AggregateType.SUM, + entity=self.entity, + ) + baseline_agg.run() + + reform_agg = Aggregate( + simulation=self.reform_simulation, + variable=self.variable_name, + aggregate_type=AggregateType.SUM, + entity=self.entity, + ) + reform_agg.run() + + self.baseline_total = float(baseline_agg.result) + self.reform_total = float(reform_agg.result) + self.change = self.reform_total - self.baseline_total + + +def compute_budget_summary( + baseline_simulation: Simulation, + reform_simulation: Simulation, + variables: dict[str, str], +) -> OutputCollection[BudgetSummaryItem]: + """Compute budget totals for each variable under baseline and reform. + + Args: + baseline_simulation: Already-run baseline simulation. + reform_simulation: Already-run reform simulation. + variables: Mapping of variable name to entity, + e.g. ``{"household_tax": "household"}``. + + Returns: + OutputCollection of BudgetSummaryItem objects with a DataFrame. + """ + results: list[BudgetSummaryItem] = [] + for var_name, entity in variables.items(): + item = BudgetSummaryItem( + baseline_simulation=baseline_simulation, + reform_simulation=reform_simulation, + variable_name=var_name, + entity=entity, + ) + item.run() + results.append(item) + + df = pd.DataFrame( + [ + { + "variable_name": r.variable_name, + "entity": r.entity, + "baseline_total": r.baseline_total, + "reform_total": r.reform_total, + "change": r.change, + } + for r in results + ] + ) + + return OutputCollection(outputs=results, dataframe=df) diff --git a/src/policyengine/outputs/change_aggregate.py b/src/policyengine/outputs/change_aggregate.py index e1cd3985..45b685c0 100644 --- a/src/policyengine/outputs/change_aggregate.py +++ b/src/policyengine/outputs/change_aggregate.py @@ -59,10 +59,15 @@ def run(self): # Get variable object var_obj = next( - v - for v in self.baseline_simulation.tax_benefit_model_version.variables - if v.name == self.variable + ( + v + for v in self.baseline_simulation.tax_benefit_model_version.variables + if v.name == self.variable + ), + None, ) + if var_obj is None: + raise ValueError(f"Variable '{self.variable}' not found in model") # Get the target entity data target_entity = self.entity or var_obj.entity @@ -123,10 +128,17 @@ def run(self): # Apply filter_variable filters if self.filter_variable is not None: filter_var_obj = next( - v - for v in self.baseline_simulation.tax_benefit_model_version.variables - if v.name == self.filter_variable + ( + v + for v in self.baseline_simulation.tax_benefit_model_version.variables + if v.name == self.filter_variable + ), + None, ) + if filter_var_obj is None: + raise ValueError( + f"Filter variable '{self.filter_variable}' not found in model" + ) if filter_var_obj.entity != target_entity: filter_mapped = ( diff --git a/src/policyengine/outputs/country_config.py b/src/policyengine/outputs/country_config.py new file mode 100644 index 00000000..f8f0c660 --- /dev/null +++ b/src/policyengine/outputs/country_config.py @@ -0,0 +1,84 @@ +"""Country configuration strategy — holds all country-specific parameters.""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class CountryConfig: + """All country-specific parameters needed by compute functions. + + Individual compute functions read the fields they need from this + config rather than accepting a ``country_id`` string and branching. + """ + + country_id: str + income_variable: str + programs: dict[str, dict] = field(default_factory=dict) + budget_variables: dict[str, str] = field(default_factory=dict) + poverty_variables: dict[str, str] = field(default_factory=dict) + poverty_entity: str = "person" + poverty_breakdowns: tuple[str, ...] = () + inequality_income_variable: str | None = None + inequality_entity: str = "household" + + +US_CONFIG = CountryConfig( + country_id="us", + income_variable="household_net_income", + programs={ + "income_tax": {"entity": "tax_unit", "is_tax": True}, + "employee_payroll_tax": {"entity": "person", "is_tax": True}, + "snap": {"entity": "spm_unit", "is_tax": False}, + "tanf": {"entity": "spm_unit", "is_tax": False}, + "ssi": {"entity": "spm_unit", "is_tax": False}, + "social_security": {"entity": "person", "is_tax": False}, + }, + budget_variables={ + "household_tax": "household", + "household_benefits": "household", + "household_net_income": "household", + "household_state_income_tax": "tax_unit", + }, + poverty_variables={ + "spm": "spm_unit_is_in_spm_poverty", + "spm_deep": "spm_unit_is_in_deep_spm_poverty", + }, + poverty_entity="person", + poverty_breakdowns=("age", "gender", "race"), + inequality_income_variable="household_net_income", + inequality_entity="household", +) + +UK_CONFIG = CountryConfig( + country_id="uk", + income_variable="equiv_hbai_household_net_income", + programs={ + "income_tax": {"entity": "person", "is_tax": True}, + "national_insurance": {"entity": "person", "is_tax": True}, + "vat": {"entity": "household", "is_tax": True}, + "council_tax": {"entity": "household", "is_tax": True}, + "universal_credit": {"entity": "person", "is_tax": False}, + "child_benefit": {"entity": "person", "is_tax": False}, + "pension_credit": {"entity": "person", "is_tax": False}, + "income_support": {"entity": "person", "is_tax": False}, + "working_tax_credit": {"entity": "person", "is_tax": False}, + "child_tax_credit": {"entity": "person", "is_tax": False}, + }, + budget_variables={ + "household_tax": "household", + "household_benefits": "household", + "household_net_income": "household", + }, + poverty_variables={ + "absolute_bhc": "in_poverty_bhc", + "absolute_ahc": "in_poverty_ahc", + "relative_bhc": "in_relative_poverty_bhc", + "relative_ahc": "in_relative_poverty_ahc", + }, + poverty_entity="person", + poverty_breakdowns=("age", "gender"), + inequality_income_variable="equiv_hbai_household_net_income", + inequality_entity="household", +) diff --git a/src/policyengine/outputs/decile_impact.py b/src/policyengine/outputs/decile_impact.py index 9d5e2e43..bb7b9c61 100644 --- a/src/policyengine/outputs/decile_impact.py +++ b/src/policyengine/outputs/decile_impact.py @@ -96,6 +96,63 @@ def run(self): self.count_no_change = float((absolute_change[mask] == 0).sum()) +def compute_decile_impacts( + baseline_simulation: Simulation, + reform_simulation: Simulation, + income_variable: str = "equiv_hbai_household_net_income", + entity: str | None = None, + quantiles: int = 10, +) -> OutputCollection[DecileImpact]: + """Calculate decile-by-decile impact using already-run simulations. + + Unlike ``calculate_decile_impacts`` this does **not** create new + Simulation objects — it works directly with the provided ones. + + Args: + baseline_simulation: Already-run baseline simulation. + reform_simulation: Already-run reform simulation. + income_variable: Variable to measure income changes. + entity: Entity to aggregate on (default: variable's entity). + quantiles: Number of quantiles (default 10 for deciles). + + Returns: + OutputCollection of DecileImpact objects with a DataFrame. + """ + results = [] + for decile in range(1, quantiles + 1): + impact = DecileImpact( + baseline_simulation=baseline_simulation, + reform_simulation=reform_simulation, + income_variable=income_variable, + entity=entity, + decile=decile, + quantiles=quantiles, + ) + impact.run() + results.append(impact) + + df = pd.DataFrame( + [ + { + "baseline_simulation_id": r.baseline_simulation.id, + "reform_simulation_id": r.reform_simulation.id, + "income_variable": r.income_variable, + "decile": r.decile, + "baseline_mean": r.baseline_mean, + "reform_mean": r.reform_mean, + "absolute_change": r.absolute_change, + "relative_change": r.relative_change, + "count_better_off": r.count_better_off, + "count_worse_off": r.count_worse_off, + "count_no_change": r.count_no_change, + } + for r in results + ] + ) + + return OutputCollection(outputs=results, dataframe=df) + + def calculate_decile_impacts( dataset: Dataset, tax_benefit_model_version: TaxBenefitModelVersion, diff --git a/src/policyengine/outputs/policy_reform_analysis.py b/src/policyengine/outputs/policy_reform_analysis.py new file mode 100644 index 00000000..710bab89 --- /dev/null +++ b/src/policyengine/outputs/policy_reform_analysis.py @@ -0,0 +1,60 @@ +"""Unified result container for a complete policy reform analysis.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from pydantic import BaseModel, ConfigDict + +from policyengine.core import OutputCollection +from policyengine.outputs.decile_impact import DecileImpact +from policyengine.outputs.inequality import Inequality +from policyengine.outputs.intra_decile_impact import IntraDecileImpact +from policyengine.outputs.poverty import Poverty + +if TYPE_CHECKING: + from policyengine.outputs.budget_summary import BudgetSummaryItem + + +class PolicyReformAnalysis(BaseModel): + """Complete result of an economic impact analysis. + + This is a pure result container — it does no computation itself. + ``economic_impact_analysis()`` (in each country's ``analysis.py``) + builds and returns an instance of this class. + + Geographic outputs (constituency, local authority, congressional + district) and wealth deciles are **not** included here because + they depend on external data or optional dataset variables and + must be able to fail independently of the core analysis. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + # Distributional + decile_impacts: OutputCollection[DecileImpact] + intra_decile_impacts: OutputCollection[IntraDecileImpact] + + # Budget + budget_summary: OutputCollection[BudgetSummaryItem] + household_count_baseline: float + household_count_reform: float + + # Programs + program_statistics: ( + OutputCollection # US ProgramStatistics or UK ProgrammeStatistics + ) + + # Poverty — overall always present, demographics optional + baseline_poverty: OutputCollection[Poverty] + reform_poverty: OutputCollection[Poverty] + baseline_poverty_by_age: OutputCollection[Poverty] | None = None + reform_poverty_by_age: OutputCollection[Poverty] | None = None + baseline_poverty_by_gender: OutputCollection[Poverty] | None = None + reform_poverty_by_gender: OutputCollection[Poverty] | None = None + baseline_poverty_by_race: OutputCollection[Poverty] | None = None + reform_poverty_by_race: OutputCollection[Poverty] | None = None + + # Inequality + baseline_inequality: Inequality + reform_inequality: Inequality diff --git a/src/policyengine/outputs/program_statistics.py b/src/policyengine/outputs/program_statistics.py new file mode 100644 index 00000000..cfd53d53 --- /dev/null +++ b/src/policyengine/outputs/program_statistics.py @@ -0,0 +1,108 @@ +"""Shared compute function for program/programme statistics.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +import pandas as pd + +from policyengine.core import OutputCollection + +if TYPE_CHECKING: + from policyengine.core.simulation import Simulation + +logger = logging.getLogger(__name__) + + +def compute_program_statistics( + baseline_simulation: Simulation, + reform_simulation: Simulation, + programs: dict[str, dict], +) -> OutputCollection: + """Compute per-program statistics for a policy reform. + + Args: + baseline_simulation: Already-run baseline simulation. + reform_simulation: Already-run reform simulation. + programs: Mapping of program name to config dict with keys + ``"entity"`` (str) and ``"is_tax"`` (bool). + Example:: + + { + "income_tax": {"entity": "tax_unit", "is_tax": True}, + "snap": {"entity": "spm_unit", "is_tax": False}, + } + + Returns: + OutputCollection of ProgramStatistics/ProgrammeStatistics objects. + Programs that raise KeyError or ValueError are silently skipped. + """ + # Import both variants — only one will actually be used depending on + # which country package is installed, but we try both so this function + # works for either. + ProgramStats: type | None = None + try: + from policyengine.tax_benefit_models.us.outputs import ProgramStatistics + + ProgramStats = ProgramStatistics + except ImportError: + pass + if ProgramStats is None: + try: + from policyengine.tax_benefit_models.uk.outputs import ProgrammeStatistics + + ProgramStats = ProgrammeStatistics + except ImportError: + pass + if ProgramStats is None: + raise ImportError( + "Neither ProgramStatistics (US) nor ProgrammeStatistics (UK) could be imported" + ) + + # Determine the field name for the program name attribute + # US uses "program_name", UK uses "programme_name" + if hasattr(ProgramStats, "model_fields"): + name_field = ( + "program_name" + if "program_name" in ProgramStats.model_fields + else "programme_name" + ) + else: + name_field = "program_name" + + results = [] + for prog_name, prog_info in programs.items(): + try: + stats = ProgramStats( + baseline_simulation=baseline_simulation, + reform_simulation=reform_simulation, + **{name_field: prog_name}, + entity=prog_info["entity"], + is_tax=prog_info.get("is_tax", False), + ) + stats.run() + results.append(stats) + except (KeyError, ValueError): + logger.warning("Skipping program %s: variable not found", prog_name) + continue + + df = pd.DataFrame( + [ + { + "program_name": getattr(r, name_field), + "entity": r.entity, + "is_tax": r.is_tax, + "baseline_total": r.baseline_total, + "reform_total": r.reform_total, + "change": r.change, + "baseline_count": r.baseline_count, + "reform_count": r.reform_count, + "winners": r.winners, + "losers": r.losers, + } + for r in results + ] + ) + + return OutputCollection(outputs=results, dataframe=df) diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py index 09e697b7..4f7cb3c7 100644 --- a/src/policyengine/tax_benefit_models/uk/__init__.py +++ b/src/policyengine/tax_benefit_models/uk/__init__.py @@ -4,6 +4,10 @@ if find_spec("policyengine_uk") is not None: from policyengine.core import Dataset + from policyengine.core.simulation import Simulation + from policyengine.outputs.budget_summary import BudgetSummaryItem + from policyengine.outputs.country_config import UK_CONFIG + from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis from .analysis import ( UKHouseholdInput, @@ -31,6 +35,8 @@ UKYearData.model_rebuild() PolicyEngineUKDataset.model_rebuild() PolicyEngineUKLatest.model_rebuild() + ProgrammeStatistics.model_rebuild(_types_namespace={"Simulation": Simulation}) + BudgetSummaryItem.model_rebuild(_types_namespace={"Simulation": Simulation}) __all__ = [ "UKYearData", @@ -47,6 +53,8 @@ "UKHouseholdInput", "UKHouseholdOutput", "ProgrammeStatistics", + "PolicyReformAnalysis", + "UK_CONFIG", ] else: __all__ = [] diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index c4b32016..04096cfe 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -8,24 +8,19 @@ from microdf import MicroDataFrame from pydantic import BaseModel, Field, create_model -from policyengine.core import OutputCollection, Simulation +from policyengine.core import Simulation from policyengine.core.policy import Policy -from policyengine.outputs.decile_impact import ( - DecileImpact, - calculate_decile_impacts, -) -from policyengine.outputs.inequality import ( - Inequality, - calculate_uk_inequality, -) -from policyengine.outputs.poverty import ( - Poverty, - calculate_uk_poverty_rates, -) +from policyengine.outputs.budget_summary import compute_budget_summary +from policyengine.outputs.country_config import UK_CONFIG +from policyengine.outputs.decile_impact import compute_decile_impacts +from policyengine.outputs.inequality import calculate_uk_inequality +from policyengine.outputs.intra_decile_impact import compute_intra_decile_impacts +from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis +from policyengine.outputs.poverty import calculate_uk_poverty_rates +from policyengine.outputs.program_statistics import compute_program_statistics from .datasets import PolicyEngineUKDataset, UKYearData from .model import uk_latest -from .outputs import ProgrammeStatistics def _create_entity_output_model(entity: str, variables: list[str]) -> type[BaseModel]: @@ -170,115 +165,94 @@ def safe_convert(value): ) -class PolicyReformAnalysis(BaseModel): - """Complete policy reform analysis result.""" - - decile_impacts: OutputCollection[DecileImpact] - programme_statistics: OutputCollection[ProgrammeStatistics] - baseline_poverty: OutputCollection[Poverty] - reform_poverty: OutputCollection[Poverty] - baseline_inequality: Inequality - reform_inequality: Inequality - - def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, ) -> PolicyReformAnalysis: - """Perform comprehensive analysis of a policy reform. + """Perform comprehensive economic impact analysis of a UK policy reform. + + Calls individual compute functions and assembles the results into + a single ``PolicyReformAnalysis`` object. - Returns: - PolicyReformAnalysis containing decile impacts and programme statistics + Both simulations must already be run (i.e. ``ensure()`` called). """ baseline_simulation.ensure() reform_simulation.ensure() - assert len(baseline_simulation.dataset.data.household) > 100, ( - "Baseline simulation must have more than 100 households" - ) - assert len(reform_simulation.dataset.data.household) > 100, ( - "Reform simulation must have more than 100 households" - ) + config = UK_CONFIG - # Decile impact - decile_impacts = calculate_decile_impacts( - dataset=baseline_simulation.dataset, - tax_benefit_model_version=baseline_simulation.tax_benefit_model_version, - baseline_policy=baseline_simulation.policy, - reform_policy=reform_simulation.policy, - dynamic=baseline_simulation.dynamic, + # Decile impacts + decile_impacts = compute_decile_impacts( + baseline_simulation, + reform_simulation, + income_variable=config.income_variable, ) - # Major programmes to analyse - programmes = { - # Tax - "income_tax": {"entity": "person", "is_tax": True}, - "national_insurance": {"entity": "person", "is_tax": True}, - "vat": {"entity": "household", "is_tax": True}, - "council_tax": {"entity": "household", "is_tax": True}, - # Benefits - "universal_credit": {"entity": "person", "is_tax": False}, - "child_benefit": {"entity": "person", "is_tax": False}, - "pension_credit": {"entity": "person", "is_tax": False}, - "income_support": {"entity": "person", "is_tax": False}, - "working_tax_credit": {"entity": "person", "is_tax": False}, - "child_tax_credit": {"entity": "person", "is_tax": False}, - } + # Intra-decile impacts + intra_decile_impacts = compute_intra_decile_impacts( + baseline_simulation, + reform_simulation, + income_variable=config.income_variable, + ) - programme_statistics = [] - - for programme_name, programme_info in programmes.items(): - entity = programme_info["entity"] - is_tax = programme_info["is_tax"] - - stats = ProgrammeStatistics( - baseline_simulation=baseline_simulation, - reform_simulation=reform_simulation, - programme_name=programme_name, - entity=entity, - is_tax=is_tax, - ) - stats.run() - programme_statistics.append(stats) - - # Create DataFrame - programme_df = pd.DataFrame( - [ - { - "baseline_simulation_id": p.baseline_simulation.id, - "reform_simulation_id": p.reform_simulation.id, - "programme_name": p.programme_name, - "entity": p.entity, - "is_tax": p.is_tax, - "baseline_total": p.baseline_total, - "reform_total": p.reform_total, - "change": p.change, - "baseline_count": p.baseline_count, - "reform_count": p.reform_count, - "winners": p.winners, - "losers": p.losers, - } - for p in programme_statistics - ] + # Budget summary + budget = compute_budget_summary( + baseline_simulation, + reform_simulation, + config.budget_variables, ) - programme_collection = OutputCollection( - outputs=programme_statistics, dataframe=programme_df + # Household counts — raw weight sums to avoid MicroSeries double-weighting + import numpy as np + + hh_weight_baseline = baseline_simulation.output_dataset.data.household[ + "household_weight" + ] + hh_weight_reform = reform_simulation.output_dataset.data.household[ + "household_weight" + ] + household_count_baseline = float(np.array(hh_weight_baseline).sum()) + household_count_reform = float(np.array(hh_weight_reform).sum()) + + # Programme statistics + programmes = compute_program_statistics( + baseline_simulation, + reform_simulation, + config.programs, ) - # Calculate poverty rates for both simulations + # Poverty — overall baseline_poverty = calculate_uk_poverty_rates(baseline_simulation) reform_poverty = calculate_uk_poverty_rates(reform_simulation) - # Calculate inequality for both simulations + # Poverty by demographics + from policyengine.outputs.poverty import ( + calculate_uk_poverty_by_age, + calculate_uk_poverty_by_gender, + ) + + baseline_poverty_by_age = calculate_uk_poverty_by_age(baseline_simulation) + reform_poverty_by_age = calculate_uk_poverty_by_age(reform_simulation) + baseline_poverty_by_gender = calculate_uk_poverty_by_gender(baseline_simulation) + reform_poverty_by_gender = calculate_uk_poverty_by_gender(reform_simulation) + + # Inequality baseline_inequality = calculate_uk_inequality(baseline_simulation) reform_inequality = calculate_uk_inequality(reform_simulation) return PolicyReformAnalysis( decile_impacts=decile_impacts, - programme_statistics=programme_collection, + intra_decile_impacts=intra_decile_impacts, + budget_summary=budget, + household_count_baseline=household_count_baseline, + household_count_reform=household_count_reform, + program_statistics=programmes, baseline_poverty=baseline_poverty, reform_poverty=reform_poverty, + baseline_poverty_by_age=baseline_poverty_by_age, + reform_poverty_by_age=reform_poverty_by_age, + baseline_poverty_by_gender=baseline_poverty_by_gender, + reform_poverty_by_gender=reform_poverty_by_gender, baseline_inequality=baseline_inequality, reform_inequality=reform_inequality, ) diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py index b5a95b3f..60263f7c 100644 --- a/src/policyengine/tax_benefit_models/us/__init__.py +++ b/src/policyengine/tax_benefit_models/us/__init__.py @@ -4,6 +4,10 @@ if find_spec("policyengine_us") is not None: from policyengine.core import Dataset + from policyengine.core.simulation import Simulation + from policyengine.outputs.budget_summary import BudgetSummaryItem + from policyengine.outputs.country_config import US_CONFIG + from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis from .analysis import ( USHouseholdInput, @@ -31,6 +35,8 @@ USYearData.model_rebuild() PolicyEngineUSDataset.model_rebuild() PolicyEngineUSLatest.model_rebuild() + ProgramStatistics.model_rebuild(_types_namespace={"Simulation": Simulation}) + BudgetSummaryItem.model_rebuild(_types_namespace={"Simulation": Simulation}) __all__ = [ "USYearData", @@ -47,6 +53,8 @@ "USHouseholdInput", "USHouseholdOutput", "ProgramStatistics", + "PolicyReformAnalysis", + "US_CONFIG", ] else: __all__ = [] diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index 4b14a93f..e27ca471 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -8,24 +8,19 @@ from microdf import MicroDataFrame from pydantic import BaseModel, Field -from policyengine.core import OutputCollection, Simulation +from policyengine.core import Simulation from policyengine.core.policy import Policy -from policyengine.outputs.decile_impact import ( - DecileImpact, - calculate_decile_impacts, -) -from policyengine.outputs.inequality import ( - Inequality, - calculate_us_inequality, -) -from policyengine.outputs.poverty import ( - Poverty, - calculate_us_poverty_rates, -) +from policyengine.outputs.budget_summary import compute_budget_summary +from policyengine.outputs.country_config import US_CONFIG +from policyengine.outputs.decile_impact import compute_decile_impacts +from policyengine.outputs.inequality import calculate_us_inequality +from policyengine.outputs.intra_decile_impact import compute_intra_decile_impacts +from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis +from policyengine.outputs.poverty import calculate_us_poverty_rates +from policyengine.outputs.program_statistics import compute_program_statistics from .datasets import PolicyEngineUSDataset, USYearData from .model import us_latest -from .outputs import ProgramStatistics class USHouseholdOutput(BaseModel): @@ -186,118 +181,99 @@ def extract_entity_outputs( ) -class PolicyReformAnalysis(BaseModel): - """Complete policy reform analysis result.""" - - decile_impacts: OutputCollection[DecileImpact] - program_statistics: OutputCollection[ProgramStatistics] - baseline_poverty: OutputCollection[Poverty] - reform_poverty: OutputCollection[Poverty] - baseline_inequality: Inequality - reform_inequality: Inequality - - def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, ) -> PolicyReformAnalysis: - """Perform comprehensive analysis of a policy reform. + """Perform comprehensive economic impact analysis of a US policy reform. - Returns: - PolicyReformAnalysis containing decile impacts and program statistics + Calls individual compute functions and assembles the results into + a single ``PolicyReformAnalysis`` object. + + Both simulations must already be run (i.e. ``ensure()`` called). """ baseline_simulation.ensure() reform_simulation.ensure() - assert len(baseline_simulation.dataset.data.household) > 100, ( - "Baseline simulation must have more than 100 households" - ) - assert len(reform_simulation.dataset.data.household) > 100, ( - "Reform simulation must have more than 100 households" - ) + config = US_CONFIG - # Decile impact (using household_net_income for US) - decile_impacts = calculate_decile_impacts( - dataset=baseline_simulation.dataset, - tax_benefit_model_version=baseline_simulation.tax_benefit_model_version, - baseline_policy=baseline_simulation.policy, - reform_policy=reform_simulation.policy, - dynamic=baseline_simulation.dynamic, - income_variable="household_net_income", + # Decile impacts + decile_impacts = compute_decile_impacts( + baseline_simulation, + reform_simulation, + income_variable=config.income_variable, ) - # Major programs to analyse - programs = { - # Federal taxes - "income_tax": {"entity": "tax_unit", "is_tax": True}, - "payroll_tax": {"entity": "person", "is_tax": True}, - # State and local taxes - "state_income_tax": {"entity": "tax_unit", "is_tax": True}, - # Benefits - "snap": {"entity": "spm_unit", "is_tax": False}, - "tanf": {"entity": "spm_unit", "is_tax": False}, - "ssi": {"entity": "person", "is_tax": False}, - "social_security": {"entity": "person", "is_tax": False}, - "medicare": {"entity": "person", "is_tax": False}, - "medicaid": {"entity": "person", "is_tax": False}, - "eitc": {"entity": "tax_unit", "is_tax": False}, - "ctc": {"entity": "tax_unit", "is_tax": False}, - } + # Intra-decile impacts + intra_decile_impacts = compute_intra_decile_impacts( + baseline_simulation, + reform_simulation, + income_variable=config.income_variable, + ) - program_statistics = [] - - for program_name, program_info in programs.items(): - entity = program_info["entity"] - is_tax = program_info["is_tax"] - - stats = ProgramStatistics( - baseline_simulation=baseline_simulation, - reform_simulation=reform_simulation, - program_name=program_name, - entity=entity, - is_tax=is_tax, - ) - stats.run() - program_statistics.append(stats) - - # Create DataFrame - program_df = pd.DataFrame( - [ - { - "baseline_simulation_id": p.baseline_simulation.id, - "reform_simulation_id": p.reform_simulation.id, - "program_name": p.program_name, - "entity": p.entity, - "is_tax": p.is_tax, - "baseline_total": p.baseline_total, - "reform_total": p.reform_total, - "change": p.change, - "baseline_count": p.baseline_count, - "reform_count": p.reform_count, - "winners": p.winners, - "losers": p.losers, - } - for p in program_statistics - ] + # Budget summary + budget = compute_budget_summary( + baseline_simulation, + reform_simulation, + config.budget_variables, ) - program_collection = OutputCollection( - outputs=program_statistics, dataframe=program_df + # Household counts — raw weight sums to avoid MicroSeries double-weighting + import numpy as np + + hh_weight_baseline = baseline_simulation.output_dataset.data.household[ + "household_weight" + ] + hh_weight_reform = reform_simulation.output_dataset.data.household[ + "household_weight" + ] + household_count_baseline = float(np.array(hh_weight_baseline).sum()) + household_count_reform = float(np.array(hh_weight_reform).sum()) + + # Program statistics + programs = compute_program_statistics( + baseline_simulation, + reform_simulation, + config.programs, ) - # Calculate poverty rates for both simulations + # Poverty — overall baseline_poverty = calculate_us_poverty_rates(baseline_simulation) reform_poverty = calculate_us_poverty_rates(reform_simulation) - # Calculate inequality for both simulations + # Poverty by demographics + from policyengine.outputs.poverty import ( + calculate_us_poverty_by_age, + calculate_us_poverty_by_gender, + calculate_us_poverty_by_race, + ) + + baseline_poverty_by_age = calculate_us_poverty_by_age(baseline_simulation) + reform_poverty_by_age = calculate_us_poverty_by_age(reform_simulation) + baseline_poverty_by_gender = calculate_us_poverty_by_gender(baseline_simulation) + reform_poverty_by_gender = calculate_us_poverty_by_gender(reform_simulation) + baseline_poverty_by_race = calculate_us_poverty_by_race(baseline_simulation) + reform_poverty_by_race = calculate_us_poverty_by_race(reform_simulation) + + # Inequality baseline_inequality = calculate_us_inequality(baseline_simulation) reform_inequality = calculate_us_inequality(reform_simulation) return PolicyReformAnalysis( decile_impacts=decile_impacts, - program_statistics=program_collection, + intra_decile_impacts=intra_decile_impacts, + budget_summary=budget, + household_count_baseline=household_count_baseline, + household_count_reform=household_count_reform, + program_statistics=programs, baseline_poverty=baseline_poverty, reform_poverty=reform_poverty, + baseline_poverty_by_age=baseline_poverty_by_age, + reform_poverty_by_age=reform_poverty_by_age, + baseline_poverty_by_gender=baseline_poverty_by_gender, + reform_poverty_by_gender=reform_poverty_by_gender, + baseline_poverty_by_race=baseline_poverty_by_race, + reform_poverty_by_race=reform_poverty_by_race, baseline_inequality=baseline_inequality, reform_inequality=reform_inequality, ) From bc5dcff8d2742b7518d7f4c4599d317a4643cb60 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Wed, 18 Mar 2026 00:09:58 +0100 Subject: [PATCH 2/5] fix: Add PolicyReformAnalysis.model_rebuild() and update example script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add model_rebuild() for PolicyReformAnalysis in both US and UK __init__.py to resolve BudgetSummaryItem forward reference (TYPE_CHECKING import) - Fix test_aggregate to expect ValueError instead of StopIteration - Fix example script bp.metric → bp.poverty_type to match Poverty class Co-Authored-By: Claude Opus 4.6 --- examples/us_budgetary_impact.py | 155 ++++++++++++++++++ .../tax_benefit_models/uk/__init__.py | 3 + .../tax_benefit_models/us/__init__.py | 3 + tests/test_aggregate.py | 4 +- 4 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 examples/us_budgetary_impact.py diff --git a/examples/us_budgetary_impact.py b/examples/us_budgetary_impact.py new file mode 100644 index 00000000..f16fb051 --- /dev/null +++ b/examples/us_budgetary_impact.py @@ -0,0 +1,155 @@ +"""Example: US budgetary impact comparison between baseline and reform. + +Demonstrates the canonical policyengine.py workflow: +1. Ensure datasets exist (download + compute or load from cache) +2. Define a parametric reform +3. Run baseline and reform simulations +4. Use economic_impact_analysis() for the full analysis +5. Use ChangeAggregate for targeted single-metric queries + +Run: python examples/us_budgetary_impact.py +""" + +import datetime + +from policyengine.core import Parameter, ParameterValue, Policy, Simulation +from policyengine.outputs.change_aggregate import ( + ChangeAggregate, + ChangeAggregateType, +) +from policyengine.tax_benefit_models.us import ( + economic_impact_analysis, + ensure_datasets, + us_latest, +) + + +def main(): + year = 2026 + + # ── Step 1: Get dataset (downloads from HuggingFace on first run) ── + print("Ensuring datasets are available...") + datasets = ensure_datasets( + datasets=["hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5"], + years=[year], + data_folder="./data", + ) + dataset = datasets[f"enhanced_cps_2024_{year}"] + print(f" Loaded: {dataset}") + + # ── Step 2: Define a reform ── + # Example: double the standard deduction for single filers + param = Parameter( + name="gov.irs.deductions.standard.amount.SINGLE", + tax_benefit_model_version=us_latest, + ) + reform = Policy( + name="Double standard deduction (single)", + parameter_values=[ + ParameterValue( + parameter=param, + start_date=datetime.date(year, 1, 1), + end_date=datetime.date(year, 12, 31), + value=30_950, + ), + ], + ) + + # ── Step 3: Create simulations ── + baseline_sim = Simulation( + dataset=dataset, + tax_benefit_model_version=us_latest, + ) + reform_sim = Simulation( + dataset=dataset, + tax_benefit_model_version=us_latest, + policy=reform, + ) + + # ── Step 4a: Quick budgetary number via ChangeAggregate ── + # This requires running the simulations first. + print("\nRunning simulations...") + baseline_sim.run() + reform_sim.run() + + tax_change = ChangeAggregate( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, + variable="household_tax", + aggregate_type=ChangeAggregateType.SUM, + ) + tax_change.run() + print("\nQuick budgetary result:") + print(f" Tax revenue change: ${tax_change.result / 1e9:.2f}B") + + # Count winners and losers + winners = ChangeAggregate( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, + variable="household_net_income", + aggregate_type=ChangeAggregateType.COUNT, + change_geq=1, + ) + losers = ChangeAggregate( + baseline_simulation=baseline_sim, + reform_simulation=reform_sim, + variable="household_net_income", + aggregate_type=ChangeAggregateType.COUNT, + change_leq=-1, + ) + winners.run() + losers.run() + print(f" Winners: {winners.result / 1e6:.2f}M households") + print(f" Losers: {losers.result / 1e6:.2f}M households") + + # ── Step 4b: Full analysis via economic_impact_analysis ── + # Note: this calls .ensure() internally, which is a no-op here since + # we already ran the simulations above. If we hadn't called .run(), + # ensure() would run + cache them automatically. + print("\nRunning full economic impact analysis...") + analysis = economic_impact_analysis(baseline_sim, reform_sim) + + print("\n=== Program-by-Program Impact ===") + for prog in analysis.program_statistics.outputs: + print( + f" {prog.program_name:30s} " + f"baseline=${prog.baseline_total / 1e9:8.1f}B " + f"reform=${prog.reform_total / 1e9:8.1f}B " + f"change=${prog.change / 1e9:+8.1f}B" + ) + + print("\n=== Decile Impacts ===") + for d in analysis.decile_impacts.outputs: + print( + f" Decile {d.decile:2d}: " + f"avg change=${d.absolute_change:+8.0f} " + f"relative={d.relative_change:+.2%}" + ) + + print("\n=== Poverty ===") + for bp, rp in zip( + analysis.baseline_poverty.outputs, + analysis.reform_poverty.outputs, + strict=True, + ): + print( + f" {bp.poverty_type:30s} " + f"baseline={bp.rate:.4f} " + f"reform={rp.rate:.4f} " + f"change={rp.rate - bp.rate:+.4f}" + ) + + print("\n=== Inequality ===") + bi = analysis.baseline_inequality + ri = analysis.reform_inequality + print(f" Gini: baseline={bi.gini:.4f} reform={ri.gini:.4f}") + print( + f" Top 10% share: baseline={bi.top_10_share:.4f} reform={ri.top_10_share:.4f}" + ) + print( + f" Top 1% share: baseline={bi.top_1_share:.4f} reform={ri.top_1_share:.4f}" + ) + + +if __name__ == "__main__": + main() diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py index 4f7cb3c7..cff18a7e 100644 --- a/src/policyengine/tax_benefit_models/uk/__init__.py +++ b/src/policyengine/tax_benefit_models/uk/__init__.py @@ -37,6 +37,9 @@ PolicyEngineUKLatest.model_rebuild() ProgrammeStatistics.model_rebuild(_types_namespace={"Simulation": Simulation}) BudgetSummaryItem.model_rebuild(_types_namespace={"Simulation": Simulation}) + PolicyReformAnalysis.model_rebuild( + _types_namespace={"BudgetSummaryItem": BudgetSummaryItem} + ) __all__ = [ "UKYearData", diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py index 60263f7c..f1fc3afd 100644 --- a/src/policyengine/tax_benefit_models/us/__init__.py +++ b/src/policyengine/tax_benefit_models/us/__init__.py @@ -37,6 +37,9 @@ PolicyEngineUSLatest.model_rebuild() ProgramStatistics.model_rebuild(_types_namespace={"Simulation": Simulation}) BudgetSummaryItem.model_rebuild(_types_namespace={"Simulation": Simulation}) + PolicyReformAnalysis.model_rebuild( + _types_namespace={"BudgetSummaryItem": BudgetSummaryItem} + ) __all__ = [ "USYearData", diff --git a/tests/test_aggregate.py b/tests/test_aggregate.py index 5b4e8b27..8c0a5c37 100644 --- a/tests/test_aggregate.py +++ b/tests/test_aggregate.py @@ -478,7 +478,7 @@ def test_aggregate_invalid_variable(): variable="nonexistent_variable", aggregate_type=AggregateType.SUM, ) - with pytest.raises(StopIteration): + with pytest.raises(ValueError): agg.run() # Invalid filter variable name should raise error on run() @@ -488,5 +488,5 @@ def test_aggregate_invalid_variable(): aggregate_type=AggregateType.SUM, filter_variable="nonexistent_filter", ) - with pytest.raises(StopIteration): + with pytest.raises(ValueError): agg.run() From 3f95127910a6f4493cb8e9679225512f0b15d1eb Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 19 Mar 2026 00:20:47 +0100 Subject: [PATCH 3/5] fix: Address review findings for economic impact consolidation Fix DecileImpact.run() StopIteration bug, move inline imports to top-level, improve exception logging in compute_program_statistics, use model_construct in compute_decile_impacts, and add 11 tests for the new economic impact output modules. Co-Authored-By: Claude Opus 4.6 --- .../consolidate-economic-impact.added.md | 1 + src/policyengine/outputs/decile_impact.py | 13 +- .../outputs/program_statistics.py | 4 +- .../tax_benefit_models/us/analysis.py | 16 +- tests/test_economic_impact_outputs.py | 201 ++++++++++++++++++ 5 files changed, 220 insertions(+), 15 deletions(-) create mode 100644 changelog.d/consolidate-economic-impact.added.md create mode 100644 tests/test_economic_impact_outputs.py diff --git a/changelog.d/consolidate-economic-impact.added.md b/changelog.d/consolidate-economic-impact.added.md new file mode 100644 index 00000000..b711004d --- /dev/null +++ b/changelog.d/consolidate-economic-impact.added.md @@ -0,0 +1 @@ +Shared compute functions for economic impact analysis: CountryConfig, BudgetSummaryItem, compute_program_statistics, compute_decile_impacts, and PolicyReformAnalysis diff --git a/src/policyengine/outputs/decile_impact.py b/src/policyengine/outputs/decile_impact.py index bb7b9c61..0379ae1c 100644 --- a/src/policyengine/outputs/decile_impact.py +++ b/src/policyengine/outputs/decile_impact.py @@ -34,10 +34,15 @@ def run(self): """Calculate impact for this specific decile.""" # Get variable object to determine entity var_obj = next( - v - for v in self.baseline_simulation.tax_benefit_model_version.variables - if v.name == self.income_variable + ( + v + for v in self.baseline_simulation.tax_benefit_model_version.variables + if v.name == self.income_variable + ), + None, ) + if var_obj is None: + raise ValueError(f"Variable '{self.income_variable}' not found in model") # Get target entity target_entity = self.entity or var_obj.entity @@ -120,7 +125,7 @@ def compute_decile_impacts( """ results = [] for decile in range(1, quantiles + 1): - impact = DecileImpact( + impact = DecileImpact.model_construct( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, income_variable=income_variable, diff --git a/src/policyengine/outputs/program_statistics.py b/src/policyengine/outputs/program_statistics.py index cfd53d53..422de4b8 100644 --- a/src/policyengine/outputs/program_statistics.py +++ b/src/policyengine/outputs/program_statistics.py @@ -83,8 +83,8 @@ def compute_program_statistics( ) stats.run() results.append(stats) - except (KeyError, ValueError): - logger.warning("Skipping program %s: variable not found", prog_name) + except (KeyError, ValueError) as exc: + logger.warning("Skipping program %s: %s", prog_name, exc, exc_info=True) continue df = pd.DataFrame( diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index e27ca471..b83646f3 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Any +import numpy as np import pandas as pd from microdf import MicroDataFrame from pydantic import BaseModel, Field @@ -16,7 +17,12 @@ from policyengine.outputs.inequality import calculate_us_inequality from policyengine.outputs.intra_decile_impact import compute_intra_decile_impacts from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis -from policyengine.outputs.poverty import calculate_us_poverty_rates +from policyengine.outputs.poverty import ( + calculate_us_poverty_by_age, + calculate_us_poverty_by_gender, + calculate_us_poverty_by_race, + calculate_us_poverty_rates, +) from policyengine.outputs.program_statistics import compute_program_statistics from .datasets import PolicyEngineUSDataset, USYearData @@ -219,8 +225,6 @@ def economic_impact_analysis( ) # Household counts — raw weight sums to avoid MicroSeries double-weighting - import numpy as np - hh_weight_baseline = baseline_simulation.output_dataset.data.household[ "household_weight" ] @@ -242,12 +246,6 @@ def economic_impact_analysis( reform_poverty = calculate_us_poverty_rates(reform_simulation) # Poverty by demographics - from policyengine.outputs.poverty import ( - calculate_us_poverty_by_age, - calculate_us_poverty_by_gender, - calculate_us_poverty_by_race, - ) - baseline_poverty_by_age = calculate_us_poverty_by_age(baseline_simulation) reform_poverty_by_age = calculate_us_poverty_by_age(reform_simulation) baseline_poverty_by_gender = calculate_us_poverty_by_gender(baseline_simulation) diff --git a/tests/test_economic_impact_outputs.py b/tests/test_economic_impact_outputs.py new file mode 100644 index 00000000..83207d72 --- /dev/null +++ b/tests/test_economic_impact_outputs.py @@ -0,0 +1,201 @@ +"""Tests for the new economic impact output modules.""" + +from dataclasses import FrozenInstanceError +from unittest.mock import MagicMock + +import numpy as np +import pandas as pd +import pytest +from microdf import MicroDataFrame + +from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType +from policyengine.outputs.country_config import UK_CONFIG, US_CONFIG +from policyengine.outputs.decile_impact import DecileImpact, compute_decile_impacts + +# --------------------------------------------------------------------------- +# Helpers (same pattern as test_intra_decile_impact.py) +# --------------------------------------------------------------------------- + + +def _make_variable_mock(name: str, entity: str) -> MagicMock: + """Create a mock Variable with name and entity attributes.""" + var = MagicMock() + var.name = name + var.entity = entity + return var + + +def _make_sim(household_data: dict, variables: list | None = None) -> MagicMock: + """Create a mock Simulation with household-level data.""" + hh_df = MicroDataFrame( + pd.DataFrame(household_data), + weights="household_weight", + ) + sim = MagicMock() + sim.output_dataset.data.household = hh_df + sim.id = "test-sim" + if variables is not None: + sim.tax_benefit_model_version.variables = variables + return sim + + +# --------------------------------------------------------------------------- +# CountryConfig tests +# --------------------------------------------------------------------------- + + +def test_us_config_is_frozen(): + """US_CONFIG should be immutable.""" + with pytest.raises(FrozenInstanceError): + US_CONFIG.country_id = "uk" + + +def test_uk_config_is_frozen(): + """UK_CONFIG should be immutable.""" + with pytest.raises(FrozenInstanceError): + UK_CONFIG.country_id = "us" + + +def test_us_config_has_correct_country_id(): + assert US_CONFIG.country_id == "us" + + +def test_uk_config_has_correct_country_id(): + assert UK_CONFIG.country_id == "uk" + + +def test_us_config_programs(): + """US_CONFIG should contain expected program keys.""" + expected = { + "income_tax", + "employee_payroll_tax", + "snap", + "tanf", + "ssi", + "social_security", + } + assert set(US_CONFIG.programs.keys()) == expected + + +def test_uk_config_programs(): + """UK_CONFIG should contain expected programme keys.""" + expected = { + "income_tax", + "national_insurance", + "vat", + "council_tax", + "universal_credit", + "child_benefit", + "pension_credit", + "income_support", + "working_tax_credit", + "child_tax_credit", + } + assert set(UK_CONFIG.programs.keys()) == expected + + +def test_country_config_program_structure(): + """Each program entry should have 'entity' and 'is_tax' keys.""" + for name, info in US_CONFIG.programs.items(): + assert "entity" in info, f"US program {name} missing 'entity'" + assert "is_tax" in info, f"US program {name} missing 'is_tax'" + for name, info in UK_CONFIG.programs.items(): + assert "entity" in info, f"UK programme {name} missing 'entity'" + assert "is_tax" in info, f"UK programme {name} missing 'is_tax'" + + +# --------------------------------------------------------------------------- +# DecileImpact tests +# --------------------------------------------------------------------------- + + +def test_decile_impact_variable_not_found(): + """DecileImpact.run() should raise ValueError for a nonexistent variable.""" + variables = [_make_variable_mock("household_net_income", "household")] + sim = _make_sim( + {"household_net_income": [50000.0], "household_weight": [1.0]}, + variables=variables, + ) + + di = DecileImpact.model_construct( + baseline_simulation=sim, + reform_simulation=sim, + income_variable="nonexistent_variable", + entity="household", + decile=1, + ) + with pytest.raises(ValueError, match="not found in model"): + di.run() + + +def test_compute_decile_impacts_returns_10(): + """compute_decile_impacts should return 10 DecileImpact objects by default.""" + n = 100 + incomes = np.linspace(10000, 100000, n) + reform_incomes = incomes + 500 + variables = [_make_variable_mock("household_net_income", "household")] + + baseline = _make_sim( + {"household_net_income": incomes, "household_weight": np.ones(n)}, + variables=variables, + ) + reform = _make_sim( + {"household_net_income": reform_incomes, "household_weight": np.ones(n)}, + variables=variables, + ) + + result = compute_decile_impacts( + baseline, reform, income_variable="household_net_income", entity="household" + ) + + assert len(result.outputs) == 10 + assert len(result.dataframe) == 10 + + # Each decile should have absolute_change ~500 + for di in result.outputs: + assert abs(di.absolute_change - 500.0) < 1e-6 + + +def test_compute_decile_impacts_custom_quantiles(): + """compute_decile_impacts with quantiles=5 should return 5 outputs.""" + n = 100 + incomes = np.linspace(10000, 100000, n) + variables = [_make_variable_mock("household_net_income", "household")] + + sim = _make_sim( + {"household_net_income": incomes, "household_weight": np.ones(n)}, + variables=variables, + ) + + result = compute_decile_impacts( + sim, + sim, + income_variable="household_net_income", + entity="household", + quantiles=5, + ) + + assert len(result.outputs) == 5 + + +# --------------------------------------------------------------------------- +# ChangeAggregate error test +# --------------------------------------------------------------------------- + + +def test_change_aggregate_variable_not_found(): + """ChangeAggregate should raise ValueError for a nonexistent variable.""" + variables = [_make_variable_mock("employment_income", "person")] + sim = _make_sim( + {"household_net_income": [50000.0], "household_weight": [1.0]}, + variables=variables, + ) + + ca = ChangeAggregate.model_construct( + baseline_simulation=sim, + reform_simulation=sim, + variable="nonexistent_variable", + aggregate_type=ChangeAggregateType.COUNT, + ) + with pytest.raises(ValueError, match="not found in model"): + ca.run() From c917c86ca7dcd1fdba4debac1182addcef7e459e Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 19 Mar 2026 20:54:05 +0100 Subject: [PATCH 4/5] refactor: Replace CountryConfig with AnalysisStrategy protocol Replace the frozen CountryConfig dataclass with a strategy pattern: a shared economic_impact_analysis() delegates to country-specific strategy functions at five typed extension points (income_variable, budget_variable_names, programs, compute_poverty, compute_inequality). Budget variable entities are now looked up from TaxBenefitModelVersion instead of being hardcoded in config. Co-Authored-By: Claude Opus 4.6 --- src/policyengine/outputs/__init__.py | 16 +- src/policyengine/outputs/analysis_strategy.py | 81 +++++++++ src/policyengine/outputs/budget_summary.py | 11 +- src/policyengine/outputs/country_config.py | 84 --------- src/policyengine/outputs/economic_impact.py | 99 +++++++++++ .../tax_benefit_models/uk/__init__.py | 12 +- .../tax_benefit_models/uk/analysis.py | 162 ++++++++---------- .../tax_benefit_models/us/__init__.py | 12 +- .../tax_benefit_models/us/analysis.py | 153 +++++++---------- tests/test_economic_impact_outputs.py | 61 ++----- 10 files changed, 373 insertions(+), 318 deletions(-) create mode 100644 src/policyengine/outputs/analysis_strategy.py delete mode 100644 src/policyengine/outputs/country_config.py create mode 100644 src/policyengine/outputs/economic_impact.py diff --git a/src/policyengine/outputs/__init__.py b/src/policyengine/outputs/__init__.py index f805310d..d6b68acd 100644 --- a/src/policyengine/outputs/__init__.py +++ b/src/policyengine/outputs/__init__.py @@ -1,5 +1,10 @@ from policyengine.core import Output, OutputCollection from policyengine.outputs.aggregate import Aggregate, AggregateType +from policyengine.outputs.analysis_strategy import ( + AnalysisStrategy, + InequalityResult, + PovertyResult, +) from policyengine.outputs.budget_summary import ( BudgetSummaryItem, compute_budget_summary, @@ -16,12 +21,14 @@ ConstituencyImpact, compute_uk_constituency_impacts, ) -from policyengine.outputs.country_config import UK_CONFIG, US_CONFIG, CountryConfig from policyengine.outputs.decile_impact import ( DecileImpact, calculate_decile_impacts, compute_decile_impacts, ) +from policyengine.outputs.economic_impact import ( + economic_impact_analysis, +) from policyengine.outputs.inequality import ( UK_INEQUALITY_INCOME_VARIABLE, US_INEQUALITY_INCOME_VARIABLE, @@ -99,7 +106,8 @@ "compute_decile_impacts", "compute_program_statistics", "PolicyReformAnalysis", - "CountryConfig", - "US_CONFIG", - "UK_CONFIG", + "AnalysisStrategy", + "PovertyResult", + "InequalityResult", + "economic_impact_analysis", ] diff --git a/src/policyengine/outputs/analysis_strategy.py b/src/policyengine/outputs/analysis_strategy.py new file mode 100644 index 00000000..fd46f808 --- /dev/null +++ b/src/policyengine/outputs/analysis_strategy.py @@ -0,0 +1,81 @@ +"""Strategy protocol and result types for economic impact analysis.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +from pydantic import BaseModel, ConfigDict + +from policyengine.core import OutputCollection +from policyengine.outputs.inequality import Inequality +from policyengine.outputs.poverty import Poverty + +if TYPE_CHECKING: + from policyengine.core.simulation import Simulation + + +class PovertyResult(BaseModel): + """Standardised poverty result returned by a country strategy.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + baseline_poverty: OutputCollection[Poverty] + reform_poverty: OutputCollection[Poverty] + baseline_poverty_by_age: OutputCollection[Poverty] | None = None + reform_poverty_by_age: OutputCollection[Poverty] | None = None + baseline_poverty_by_gender: OutputCollection[Poverty] | None = None + reform_poverty_by_gender: OutputCollection[Poverty] | None = None + baseline_poverty_by_race: OutputCollection[Poverty] | None = None + reform_poverty_by_race: OutputCollection[Poverty] | None = None + + +class InequalityResult(BaseModel): + """Standardised inequality result returned by a country strategy.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + baseline_inequality: Inequality + reform_inequality: Inequality + + +@runtime_checkable +class AnalysisStrategy(Protocol): + """Country-specific strategy for economic impact analysis. + + Each property/method corresponds to a standardised extension point + in the shared analysis pipeline. + """ + + @property + def income_variable(self) -> str: + """Primary income variable for decile / intra-decile analysis.""" + ... + + @property + def budget_variable_names(self) -> list[str]: + """Variable names for budget summary. + + Entities are looked up from the tax-benefit system at runtime. + """ + ... + + @property + def programs(self) -> dict[str, dict]: + """Program definitions: name -> {"entity": str, "is_tax": bool}.""" + ... + + def compute_poverty( + self, + baseline: Simulation, + reform: Simulation, + ) -> PovertyResult: + """Compute all poverty metrics (overall + demographic breakdowns).""" + ... + + def compute_inequality( + self, + baseline: Simulation, + reform: Simulation, + ) -> InequalityResult: + """Compute inequality metrics.""" + ... diff --git a/src/policyengine/outputs/budget_summary.py b/src/policyengine/outputs/budget_summary.py index 79d1655e..eccea659 100644 --- a/src/policyengine/outputs/budget_summary.py +++ b/src/policyengine/outputs/budget_summary.py @@ -54,21 +54,24 @@ def run(self): def compute_budget_summary( baseline_simulation: Simulation, reform_simulation: Simulation, - variables: dict[str, str], + variable_names: list[str], ) -> OutputCollection[BudgetSummaryItem]: """Compute budget totals for each variable under baseline and reform. Args: baseline_simulation: Already-run baseline simulation. reform_simulation: Already-run reform simulation. - variables: Mapping of variable name to entity, - e.g. ``{"household_tax": "household"}``. + variable_names: Variable names to aggregate. The entity for each + variable is looked up from the tax-benefit model version + attached to the baseline simulation. Returns: OutputCollection of BudgetSummaryItem objects with a DataFrame. """ + tbm = baseline_simulation.tax_benefit_model_version results: list[BudgetSummaryItem] = [] - for var_name, entity in variables.items(): + for var_name in variable_names: + entity = tbm.get_variable(var_name).entity item = BudgetSummaryItem( baseline_simulation=baseline_simulation, reform_simulation=reform_simulation, diff --git a/src/policyengine/outputs/country_config.py b/src/policyengine/outputs/country_config.py deleted file mode 100644 index f8f0c660..00000000 --- a/src/policyengine/outputs/country_config.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Country configuration strategy — holds all country-specific parameters.""" - -from __future__ import annotations - -from dataclasses import dataclass, field - - -@dataclass(frozen=True) -class CountryConfig: - """All country-specific parameters needed by compute functions. - - Individual compute functions read the fields they need from this - config rather than accepting a ``country_id`` string and branching. - """ - - country_id: str - income_variable: str - programs: dict[str, dict] = field(default_factory=dict) - budget_variables: dict[str, str] = field(default_factory=dict) - poverty_variables: dict[str, str] = field(default_factory=dict) - poverty_entity: str = "person" - poverty_breakdowns: tuple[str, ...] = () - inequality_income_variable: str | None = None - inequality_entity: str = "household" - - -US_CONFIG = CountryConfig( - country_id="us", - income_variable="household_net_income", - programs={ - "income_tax": {"entity": "tax_unit", "is_tax": True}, - "employee_payroll_tax": {"entity": "person", "is_tax": True}, - "snap": {"entity": "spm_unit", "is_tax": False}, - "tanf": {"entity": "spm_unit", "is_tax": False}, - "ssi": {"entity": "spm_unit", "is_tax": False}, - "social_security": {"entity": "person", "is_tax": False}, - }, - budget_variables={ - "household_tax": "household", - "household_benefits": "household", - "household_net_income": "household", - "household_state_income_tax": "tax_unit", - }, - poverty_variables={ - "spm": "spm_unit_is_in_spm_poverty", - "spm_deep": "spm_unit_is_in_deep_spm_poverty", - }, - poverty_entity="person", - poverty_breakdowns=("age", "gender", "race"), - inequality_income_variable="household_net_income", - inequality_entity="household", -) - -UK_CONFIG = CountryConfig( - country_id="uk", - income_variable="equiv_hbai_household_net_income", - programs={ - "income_tax": {"entity": "person", "is_tax": True}, - "national_insurance": {"entity": "person", "is_tax": True}, - "vat": {"entity": "household", "is_tax": True}, - "council_tax": {"entity": "household", "is_tax": True}, - "universal_credit": {"entity": "person", "is_tax": False}, - "child_benefit": {"entity": "person", "is_tax": False}, - "pension_credit": {"entity": "person", "is_tax": False}, - "income_support": {"entity": "person", "is_tax": False}, - "working_tax_credit": {"entity": "person", "is_tax": False}, - "child_tax_credit": {"entity": "person", "is_tax": False}, - }, - budget_variables={ - "household_tax": "household", - "household_benefits": "household", - "household_net_income": "household", - }, - poverty_variables={ - "absolute_bhc": "in_poverty_bhc", - "absolute_ahc": "in_poverty_ahc", - "relative_bhc": "in_relative_poverty_bhc", - "relative_ahc": "in_relative_poverty_ahc", - }, - poverty_entity="person", - poverty_breakdowns=("age", "gender"), - inequality_income_variable="equiv_hbai_household_net_income", - inequality_entity="household", -) diff --git a/src/policyengine/outputs/economic_impact.py b/src/policyengine/outputs/economic_impact.py new file mode 100644 index 00000000..01931bcd --- /dev/null +++ b/src/policyengine/outputs/economic_impact.py @@ -0,0 +1,99 @@ +"""Shared cross-country economic impact analysis.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +from policyengine.outputs.budget_summary import compute_budget_summary +from policyengine.outputs.decile_impact import compute_decile_impacts +from policyengine.outputs.intra_decile_impact import compute_intra_decile_impacts +from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis +from policyengine.outputs.program_statistics import compute_program_statistics + +if TYPE_CHECKING: + from policyengine.core.simulation import Simulation + from policyengine.outputs.analysis_strategy import AnalysisStrategy + + +def economic_impact_analysis( + baseline_simulation: Simulation, + reform_simulation: Simulation, + strategy: AnalysisStrategy, +) -> PolicyReformAnalysis: + """Perform comprehensive economic impact analysis of a policy reform. + + Shared implementation that delegates country-specific work to + *strategy* at five standardised extension points. + + Both simulations must already be run (i.e. ``ensure()`` called). + """ + baseline_simulation.ensure() + reform_simulation.ensure() + + # --- shared computations ------------------------------------------------ + + # Decile impacts + decile_impacts = compute_decile_impacts( + baseline_simulation, + reform_simulation, + income_variable=strategy.income_variable, + ) + + # Intra-decile impacts + intra_decile_impacts = compute_intra_decile_impacts( + baseline_simulation, + reform_simulation, + income_variable=strategy.income_variable, + ) + + # Budget summary (entity looked up from TBM inside compute_budget_summary) + budget = compute_budget_summary( + baseline_simulation, + reform_simulation, + strategy.budget_variable_names, + ) + + # Household counts — raw weight sums to avoid MicroSeries double-weighting + hh_weight_baseline = baseline_simulation.output_dataset.data.household[ + "household_weight" + ] + hh_weight_reform = reform_simulation.output_dataset.data.household[ + "household_weight" + ] + household_count_baseline = float(np.array(hh_weight_baseline).sum()) + household_count_reform = float(np.array(hh_weight_reform).sum()) + + # Program statistics + programs = compute_program_statistics( + baseline_simulation, + reform_simulation, + strategy.programs, + ) + + # --- strategy extension points ------------------------------------------ + + poverty = strategy.compute_poverty(baseline_simulation, reform_simulation) + inequality = strategy.compute_inequality(baseline_simulation, reform_simulation) + + # --- assemble result ---------------------------------------------------- + + return PolicyReformAnalysis( + decile_impacts=decile_impacts, + intra_decile_impacts=intra_decile_impacts, + budget_summary=budget, + household_count_baseline=household_count_baseline, + household_count_reform=household_count_reform, + program_statistics=programs, + baseline_poverty=poverty.baseline_poverty, + reform_poverty=poverty.reform_poverty, + baseline_poverty_by_age=poverty.baseline_poverty_by_age, + reform_poverty_by_age=poverty.reform_poverty_by_age, + baseline_poverty_by_gender=poverty.baseline_poverty_by_gender, + reform_poverty_by_gender=poverty.reform_poverty_by_gender, + baseline_poverty_by_race=poverty.baseline_poverty_by_race, + reform_poverty_by_race=poverty.reform_poverty_by_race, + baseline_inequality=inequality.baseline_inequality, + reform_inequality=inequality.reform_inequality, + ) diff --git a/src/policyengine/tax_benefit_models/uk/__init__.py b/src/policyengine/tax_benefit_models/uk/__init__.py index cff18a7e..73be9e70 100644 --- a/src/policyengine/tax_benefit_models/uk/__init__.py +++ b/src/policyengine/tax_benefit_models/uk/__init__.py @@ -5,11 +5,16 @@ if find_spec("policyengine_uk") is not None: from policyengine.core import Dataset from policyengine.core.simulation import Simulation + from policyengine.outputs.analysis_strategy import ( + AnalysisStrategy, + InequalityResult, + PovertyResult, + ) from policyengine.outputs.budget_summary import BudgetSummaryItem - from policyengine.outputs.country_config import UK_CONFIG from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis from .analysis import ( + UKAnalysisStrategy, UKHouseholdInput, UKHouseholdOutput, calculate_household_impact, @@ -57,7 +62,10 @@ "UKHouseholdOutput", "ProgrammeStatistics", "PolicyReformAnalysis", - "UK_CONFIG", + "UKAnalysisStrategy", + "AnalysisStrategy", + "PovertyResult", + "InequalityResult", ] else: __all__ = [] diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index 04096cfe..284ec38f 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -10,14 +10,17 @@ from policyengine.core import Simulation from policyengine.core.policy import Policy -from policyengine.outputs.budget_summary import compute_budget_summary -from policyengine.outputs.country_config import UK_CONFIG -from policyengine.outputs.decile_impact import compute_decile_impacts +from policyengine.outputs.analysis_strategy import InequalityResult, PovertyResult +from policyengine.outputs.economic_impact import ( + economic_impact_analysis as _shared_economic_impact_analysis, +) from policyengine.outputs.inequality import calculate_uk_inequality -from policyengine.outputs.intra_decile_impact import compute_intra_decile_impacts from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis -from policyengine.outputs.poverty import calculate_uk_poverty_rates -from policyengine.outputs.program_statistics import compute_program_statistics +from policyengine.outputs.poverty import ( + calculate_uk_poverty_by_age, + calculate_uk_poverty_by_gender, + calculate_uk_poverty_rates, +) from .datasets import PolicyEngineUKDataset, UKYearData from .model import uk_latest @@ -165,94 +168,73 @@ def safe_convert(value): ) +# --------------------------------------------------------------------------- +# UK analysis strategy +# --------------------------------------------------------------------------- + + +class UKAnalysisStrategy: + """Country-specific strategy for UK economic impact analysis.""" + + @property + def income_variable(self) -> str: + return "equiv_hbai_household_net_income" + + @property + def budget_variable_names(self) -> list[str]: + return [ + "household_tax", + "household_benefits", + "household_net_income", + ] + + @property + def programs(self) -> dict[str, dict]: + return { + "income_tax": {"entity": "person", "is_tax": True}, + "national_insurance": {"entity": "person", "is_tax": True}, + "vat": {"entity": "household", "is_tax": True}, + "council_tax": {"entity": "household", "is_tax": True}, + "universal_credit": {"entity": "person", "is_tax": False}, + "child_benefit": {"entity": "person", "is_tax": False}, + "pension_credit": {"entity": "person", "is_tax": False}, + "income_support": {"entity": "person", "is_tax": False}, + "working_tax_credit": {"entity": "person", "is_tax": False}, + "child_tax_credit": {"entity": "person", "is_tax": False}, + } + + def compute_poverty( + self, + baseline: Simulation, + reform: Simulation, + ) -> PovertyResult: + return PovertyResult( + baseline_poverty=calculate_uk_poverty_rates(baseline), + reform_poverty=calculate_uk_poverty_rates(reform), + baseline_poverty_by_age=calculate_uk_poverty_by_age(baseline), + reform_poverty_by_age=calculate_uk_poverty_by_age(reform), + baseline_poverty_by_gender=calculate_uk_poverty_by_gender(baseline), + reform_poverty_by_gender=calculate_uk_poverty_by_gender(reform), + ) + + def compute_inequality( + self, + baseline: Simulation, + reform: Simulation, + ) -> InequalityResult: + return InequalityResult( + baseline_inequality=calculate_uk_inequality(baseline), + reform_inequality=calculate_uk_inequality(reform), + ) + + def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, ) -> PolicyReformAnalysis: - """Perform comprehensive economic impact analysis of a UK policy reform. - - Calls individual compute functions and assembles the results into - a single ``PolicyReformAnalysis`` object. - - Both simulations must already be run (i.e. ``ensure()`` called). - """ - baseline_simulation.ensure() - reform_simulation.ensure() - - config = UK_CONFIG - - # Decile impacts - decile_impacts = compute_decile_impacts( + """Perform comprehensive economic impact analysis of a UK policy reform.""" + return _shared_economic_impact_analysis( baseline_simulation, reform_simulation, - income_variable=config.income_variable, - ) - - # Intra-decile impacts - intra_decile_impacts = compute_intra_decile_impacts( - baseline_simulation, - reform_simulation, - income_variable=config.income_variable, - ) - - # Budget summary - budget = compute_budget_summary( - baseline_simulation, - reform_simulation, - config.budget_variables, - ) - - # Household counts — raw weight sums to avoid MicroSeries double-weighting - import numpy as np - - hh_weight_baseline = baseline_simulation.output_dataset.data.household[ - "household_weight" - ] - hh_weight_reform = reform_simulation.output_dataset.data.household[ - "household_weight" - ] - household_count_baseline = float(np.array(hh_weight_baseline).sum()) - household_count_reform = float(np.array(hh_weight_reform).sum()) - - # Programme statistics - programmes = compute_program_statistics( - baseline_simulation, - reform_simulation, - config.programs, - ) - - # Poverty — overall - baseline_poverty = calculate_uk_poverty_rates(baseline_simulation) - reform_poverty = calculate_uk_poverty_rates(reform_simulation) - - # Poverty by demographics - from policyengine.outputs.poverty import ( - calculate_uk_poverty_by_age, - calculate_uk_poverty_by_gender, - ) - - baseline_poverty_by_age = calculate_uk_poverty_by_age(baseline_simulation) - reform_poverty_by_age = calculate_uk_poverty_by_age(reform_simulation) - baseline_poverty_by_gender = calculate_uk_poverty_by_gender(baseline_simulation) - reform_poverty_by_gender = calculate_uk_poverty_by_gender(reform_simulation) - - # Inequality - baseline_inequality = calculate_uk_inequality(baseline_simulation) - reform_inequality = calculate_uk_inequality(reform_simulation) - - return PolicyReformAnalysis( - decile_impacts=decile_impacts, - intra_decile_impacts=intra_decile_impacts, - budget_summary=budget, - household_count_baseline=household_count_baseline, - household_count_reform=household_count_reform, - program_statistics=programmes, - baseline_poverty=baseline_poverty, - reform_poverty=reform_poverty, - baseline_poverty_by_age=baseline_poverty_by_age, - reform_poverty_by_age=reform_poverty_by_age, - baseline_poverty_by_gender=baseline_poverty_by_gender, - reform_poverty_by_gender=reform_poverty_by_gender, - baseline_inequality=baseline_inequality, - reform_inequality=reform_inequality, + UKAnalysisStrategy(), ) diff --git a/src/policyengine/tax_benefit_models/us/__init__.py b/src/policyengine/tax_benefit_models/us/__init__.py index f1fc3afd..bf92cbaf 100644 --- a/src/policyengine/tax_benefit_models/us/__init__.py +++ b/src/policyengine/tax_benefit_models/us/__init__.py @@ -5,11 +5,16 @@ if find_spec("policyengine_us") is not None: from policyengine.core import Dataset from policyengine.core.simulation import Simulation + from policyengine.outputs.analysis_strategy import ( + AnalysisStrategy, + InequalityResult, + PovertyResult, + ) from policyengine.outputs.budget_summary import BudgetSummaryItem - from policyengine.outputs.country_config import US_CONFIG from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis from .analysis import ( + USAnalysisStrategy, USHouseholdInput, USHouseholdOutput, calculate_household_impact, @@ -57,7 +62,10 @@ "USHouseholdOutput", "ProgramStatistics", "PolicyReformAnalysis", - "US_CONFIG", + "USAnalysisStrategy", + "AnalysisStrategy", + "PovertyResult", + "InequalityResult", ] else: __all__ = [] diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index b83646f3..4e8540b6 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -4,18 +4,17 @@ from pathlib import Path from typing import Any -import numpy as np import pandas as pd from microdf import MicroDataFrame from pydantic import BaseModel, Field from policyengine.core import Simulation from policyengine.core.policy import Policy -from policyengine.outputs.budget_summary import compute_budget_summary -from policyengine.outputs.country_config import US_CONFIG -from policyengine.outputs.decile_impact import compute_decile_impacts +from policyengine.outputs.analysis_strategy import InequalityResult, PovertyResult +from policyengine.outputs.economic_impact import ( + economic_impact_analysis as _shared_economic_impact_analysis, +) from policyengine.outputs.inequality import calculate_us_inequality -from policyengine.outputs.intra_decile_impact import compute_intra_decile_impacts from policyengine.outputs.policy_reform_analysis import PolicyReformAnalysis from policyengine.outputs.poverty import ( calculate_us_poverty_by_age, @@ -23,7 +22,6 @@ calculate_us_poverty_by_race, calculate_us_poverty_rates, ) -from policyengine.outputs.program_statistics import compute_program_statistics from .datasets import PolicyEngineUSDataset, USYearData from .model import us_latest @@ -187,91 +185,72 @@ def extract_entity_outputs( ) +# --------------------------------------------------------------------------- +# US analysis strategy +# --------------------------------------------------------------------------- + + +class USAnalysisStrategy: + """Country-specific strategy for US economic impact analysis.""" + + @property + def income_variable(self) -> str: + return "household_net_income" + + @property + def budget_variable_names(self) -> list[str]: + return [ + "household_tax", + "household_benefits", + "household_net_income", + "household_state_income_tax", + ] + + @property + def programs(self) -> dict[str, dict]: + return { + "income_tax": {"entity": "tax_unit", "is_tax": True}, + "employee_payroll_tax": {"entity": "person", "is_tax": True}, + "snap": {"entity": "spm_unit", "is_tax": False}, + "tanf": {"entity": "spm_unit", "is_tax": False}, + "ssi": {"entity": "spm_unit", "is_tax": False}, + "social_security": {"entity": "person", "is_tax": False}, + } + + def compute_poverty( + self, + baseline: Simulation, + reform: Simulation, + ) -> PovertyResult: + return PovertyResult( + baseline_poverty=calculate_us_poverty_rates(baseline), + reform_poverty=calculate_us_poverty_rates(reform), + baseline_poverty_by_age=calculate_us_poverty_by_age(baseline), + reform_poverty_by_age=calculate_us_poverty_by_age(reform), + baseline_poverty_by_gender=calculate_us_poverty_by_gender(baseline), + reform_poverty_by_gender=calculate_us_poverty_by_gender(reform), + baseline_poverty_by_race=calculate_us_poverty_by_race(baseline), + reform_poverty_by_race=calculate_us_poverty_by_race(reform), + ) + + def compute_inequality( + self, + baseline: Simulation, + reform: Simulation, + ) -> InequalityResult: + return InequalityResult( + baseline_inequality=calculate_us_inequality(baseline), + reform_inequality=calculate_us_inequality(reform), + ) + + def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, ) -> PolicyReformAnalysis: - """Perform comprehensive economic impact analysis of a US policy reform. - - Calls individual compute functions and assembles the results into - a single ``PolicyReformAnalysis`` object. - - Both simulations must already be run (i.e. ``ensure()`` called). - """ - baseline_simulation.ensure() - reform_simulation.ensure() - - config = US_CONFIG - - # Decile impacts - decile_impacts = compute_decile_impacts( - baseline_simulation, - reform_simulation, - income_variable=config.income_variable, - ) - - # Intra-decile impacts - intra_decile_impacts = compute_intra_decile_impacts( + """Perform comprehensive economic impact analysis of a US policy reform.""" + return _shared_economic_impact_analysis( baseline_simulation, reform_simulation, - income_variable=config.income_variable, - ) - - # Budget summary - budget = compute_budget_summary( - baseline_simulation, - reform_simulation, - config.budget_variables, - ) - - # Household counts — raw weight sums to avoid MicroSeries double-weighting - hh_weight_baseline = baseline_simulation.output_dataset.data.household[ - "household_weight" - ] - hh_weight_reform = reform_simulation.output_dataset.data.household[ - "household_weight" - ] - household_count_baseline = float(np.array(hh_weight_baseline).sum()) - household_count_reform = float(np.array(hh_weight_reform).sum()) - - # Program statistics - programs = compute_program_statistics( - baseline_simulation, - reform_simulation, - config.programs, - ) - - # Poverty — overall - baseline_poverty = calculate_us_poverty_rates(baseline_simulation) - reform_poverty = calculate_us_poverty_rates(reform_simulation) - - # Poverty by demographics - baseline_poverty_by_age = calculate_us_poverty_by_age(baseline_simulation) - reform_poverty_by_age = calculate_us_poverty_by_age(reform_simulation) - baseline_poverty_by_gender = calculate_us_poverty_by_gender(baseline_simulation) - reform_poverty_by_gender = calculate_us_poverty_by_gender(reform_simulation) - baseline_poverty_by_race = calculate_us_poverty_by_race(baseline_simulation) - reform_poverty_by_race = calculate_us_poverty_by_race(reform_simulation) - - # Inequality - baseline_inequality = calculate_us_inequality(baseline_simulation) - reform_inequality = calculate_us_inequality(reform_simulation) - - return PolicyReformAnalysis( - decile_impacts=decile_impacts, - intra_decile_impacts=intra_decile_impacts, - budget_summary=budget, - household_count_baseline=household_count_baseline, - household_count_reform=household_count_reform, - program_statistics=programs, - baseline_poverty=baseline_poverty, - reform_poverty=reform_poverty, - baseline_poverty_by_age=baseline_poverty_by_age, - reform_poverty_by_age=reform_poverty_by_age, - baseline_poverty_by_gender=baseline_poverty_by_gender, - reform_poverty_by_gender=reform_poverty_by_gender, - baseline_poverty_by_race=baseline_poverty_by_race, - reform_poverty_by_race=reform_poverty_by_race, - baseline_inequality=baseline_inequality, - reform_inequality=reform_inequality, + USAnalysisStrategy(), ) diff --git a/tests/test_economic_impact_outputs.py b/tests/test_economic_impact_outputs.py index 83207d72..21f1c4d8 100644 --- a/tests/test_economic_impact_outputs.py +++ b/tests/test_economic_impact_outputs.py @@ -1,6 +1,5 @@ """Tests for the new economic impact output modules.""" -from dataclasses import FrozenInstanceError from unittest.mock import MagicMock import numpy as np @@ -8,8 +7,8 @@ import pytest from microdf import MicroDataFrame +from policyengine.outputs.analysis_strategy import AnalysisStrategy from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType -from policyengine.outputs.country_config import UK_CONFIG, US_CONFIG from policyengine.outputs.decile_impact import DecileImpact, compute_decile_impacts # --------------------------------------------------------------------------- @@ -40,32 +39,15 @@ def _make_sim(household_data: dict, variables: list | None = None) -> MagicMock: # --------------------------------------------------------------------------- -# CountryConfig tests +# AnalysisStrategy tests # --------------------------------------------------------------------------- -def test_us_config_is_frozen(): - """US_CONFIG should be immutable.""" - with pytest.raises(FrozenInstanceError): - US_CONFIG.country_id = "uk" +def test_us_strategy_programs(): + """USAnalysisStrategy should contain expected program keys.""" + from policyengine.tax_benefit_models.us.analysis import USAnalysisStrategy - -def test_uk_config_is_frozen(): - """UK_CONFIG should be immutable.""" - with pytest.raises(FrozenInstanceError): - UK_CONFIG.country_id = "us" - - -def test_us_config_has_correct_country_id(): - assert US_CONFIG.country_id == "us" - - -def test_uk_config_has_correct_country_id(): - assert UK_CONFIG.country_id == "uk" - - -def test_us_config_programs(): - """US_CONFIG should contain expected program keys.""" + strategy = USAnalysisStrategy() expected = { "income_tax", "employee_payroll_tax", @@ -74,34 +56,23 @@ def test_us_config_programs(): "ssi", "social_security", } - assert set(US_CONFIG.programs.keys()) == expected + assert set(strategy.programs.keys()) == expected -def test_uk_config_programs(): - """UK_CONFIG should contain expected programme keys.""" - expected = { - "income_tax", - "national_insurance", - "vat", - "council_tax", - "universal_credit", - "child_benefit", - "pension_credit", - "income_support", - "working_tax_credit", - "child_tax_credit", - } - assert set(UK_CONFIG.programs.keys()) == expected +def test_us_strategy_conforms_to_protocol(): + """USAnalysisStrategy should satisfy the AnalysisStrategy protocol.""" + from policyengine.tax_benefit_models.us.analysis import USAnalysisStrategy + + assert isinstance(USAnalysisStrategy(), AnalysisStrategy) -def test_country_config_program_structure(): +def test_us_strategy_program_structure(): """Each program entry should have 'entity' and 'is_tax' keys.""" - for name, info in US_CONFIG.programs.items(): + from policyengine.tax_benefit_models.us.analysis import USAnalysisStrategy + + for name, info in USAnalysisStrategy().programs.items(): assert "entity" in info, f"US program {name} missing 'entity'" assert "is_tax" in info, f"US program {name} missing 'is_tax'" - for name, info in UK_CONFIG.programs.items(): - assert "entity" in info, f"UK programme {name} missing 'entity'" - assert "is_tax" in info, f"UK programme {name} missing 'is_tax'" # --------------------------------------------------------------------------- From 374133555b86b3b85a536872d1fa32c6cbfef9e0 Mon Sep 17 00:00:00 2001 From: Anthony Volk Date: Thu, 19 Mar 2026 21:44:40 +0100 Subject: [PATCH 5/5] =?UTF-8?q?fix:=20Address=20review=20findings=20?= =?UTF-8?q?=E2=80=94=20ProgramDefinition=20TypedDict,=20isinstance=20guard?= =?UTF-8?q?,=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Introduce ProgramDefinition TypedDict for typed program dicts - Add isinstance guard for AnalysisStrategy in economic_impact_analysis - Add UK strategy tests guarded with pytest.importorskip - Add orchestration test verifying ensure() calls and strategy delegation - Add budget summary entity-lookup-from-TBM test - Use module-level US_STRATEGY/UK_STRATEGY singletons - Update changelog fragment Co-Authored-By: Claude Opus 4.6 --- .../consolidate-economic-impact.added.md | 2 +- src/policyengine/outputs/__init__.py | 2 + src/policyengine/outputs/analysis_strategy.py | 13 +- src/policyengine/outputs/economic_impact.py | 10 +- .../outputs/program_statistics.py | 7 +- .../tax_benefit_models/uk/analysis.py | 33 ++-- .../tax_benefit_models/us/analysis.py | 25 ++- tests/test_economic_impact_outputs.py | 176 +++++++++++++++++- 8 files changed, 234 insertions(+), 34 deletions(-) diff --git a/changelog.d/consolidate-economic-impact.added.md b/changelog.d/consolidate-economic-impact.added.md index b711004d..ae1ea94f 100644 --- a/changelog.d/consolidate-economic-impact.added.md +++ b/changelog.d/consolidate-economic-impact.added.md @@ -1 +1 @@ -Shared compute functions for economic impact analysis: CountryConfig, BudgetSummaryItem, compute_program_statistics, compute_decile_impacts, and PolicyReformAnalysis +Shared compute functions for economic impact analysis: AnalysisStrategy protocol, economic_impact_analysis, BudgetSummaryItem, compute_program_statistics, compute_decile_impacts, and PolicyReformAnalysis diff --git a/src/policyengine/outputs/__init__.py b/src/policyengine/outputs/__init__.py index d6b68acd..f58ace8c 100644 --- a/src/policyengine/outputs/__init__.py +++ b/src/policyengine/outputs/__init__.py @@ -4,6 +4,7 @@ AnalysisStrategy, InequalityResult, PovertyResult, + ProgramDefinition, ) from policyengine.outputs.budget_summary import ( BudgetSummaryItem, @@ -107,6 +108,7 @@ "compute_program_statistics", "PolicyReformAnalysis", "AnalysisStrategy", + "ProgramDefinition", "PovertyResult", "InequalityResult", "economic_impact_analysis", diff --git a/src/policyengine/outputs/analysis_strategy.py b/src/policyengine/outputs/analysis_strategy.py index fd46f808..b19e832c 100644 --- a/src/policyengine/outputs/analysis_strategy.py +++ b/src/policyengine/outputs/analysis_strategy.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Protocol, runtime_checkable +from typing import TYPE_CHECKING, Protocol, TypedDict, runtime_checkable from pydantic import BaseModel, ConfigDict @@ -14,6 +14,13 @@ from policyengine.core.simulation import Simulation +class ProgramDefinition(TypedDict): + """Definition of a program for program statistics computation.""" + + entity: str + is_tax: bool + + class PovertyResult(BaseModel): """Standardised poverty result returned by a country strategy.""" @@ -60,8 +67,8 @@ def budget_variable_names(self) -> list[str]: ... @property - def programs(self) -> dict[str, dict]: - """Program definitions: name -> {"entity": str, "is_tax": bool}.""" + def programs(self) -> dict[str, ProgramDefinition]: + """Program definitions: name -> ProgramDefinition.""" ... def compute_poverty( diff --git a/src/policyengine/outputs/economic_impact.py b/src/policyengine/outputs/economic_impact.py index 01931bcd..f80ef58b 100644 --- a/src/policyengine/outputs/economic_impact.py +++ b/src/policyengine/outputs/economic_impact.py @@ -6,6 +6,7 @@ import numpy as np +from policyengine.outputs.analysis_strategy import AnalysisStrategy from policyengine.outputs.budget_summary import compute_budget_summary from policyengine.outputs.decile_impact import compute_decile_impacts from policyengine.outputs.intra_decile_impact import compute_intra_decile_impacts @@ -14,7 +15,6 @@ if TYPE_CHECKING: from policyengine.core.simulation import Simulation - from policyengine.outputs.analysis_strategy import AnalysisStrategy def economic_impact_analysis( @@ -29,6 +29,14 @@ def economic_impact_analysis( Both simulations must already be run (i.e. ``ensure()`` called). """ + if not isinstance(strategy, AnalysisStrategy): + raise TypeError( + f"strategy must implement the AnalysisStrategy protocol, " + f"but got {type(strategy).__name__}. Ensure it defines: " + f"income_variable, budget_variable_names, programs, " + f"compute_poverty(), and compute_inequality()." + ) + baseline_simulation.ensure() reform_simulation.ensure() diff --git a/src/policyengine/outputs/program_statistics.py b/src/policyengine/outputs/program_statistics.py index 422de4b8..8f49c9ba 100644 --- a/src/policyengine/outputs/program_statistics.py +++ b/src/policyengine/outputs/program_statistics.py @@ -8,6 +8,7 @@ import pandas as pd from policyengine.core import OutputCollection +from policyengine.outputs.analysis_strategy import ProgramDefinition if TYPE_CHECKING: from policyengine.core.simulation import Simulation @@ -18,15 +19,15 @@ def compute_program_statistics( baseline_simulation: Simulation, reform_simulation: Simulation, - programs: dict[str, dict], + programs: dict[str, ProgramDefinition], ) -> OutputCollection: """Compute per-program statistics for a policy reform. Args: baseline_simulation: Already-run baseline simulation. reform_simulation: Already-run reform simulation. - programs: Mapping of program name to config dict with keys - ``"entity"`` (str) and ``"is_tax"`` (bool). + programs: Mapping of program name to :class:`ProgramDefinition` + with keys ``"entity"`` (str) and ``"is_tax"`` (bool). Example:: { diff --git a/src/policyengine/tax_benefit_models/uk/analysis.py b/src/policyengine/tax_benefit_models/uk/analysis.py index 284ec38f..be77de61 100644 --- a/src/policyengine/tax_benefit_models/uk/analysis.py +++ b/src/policyengine/tax_benefit_models/uk/analysis.py @@ -10,7 +10,11 @@ from policyengine.core import Simulation from policyengine.core.policy import Policy -from policyengine.outputs.analysis_strategy import InequalityResult, PovertyResult +from policyengine.outputs.analysis_strategy import ( + InequalityResult, + PovertyResult, + ProgramDefinition, +) from policyengine.outputs.economic_impact import ( economic_impact_analysis as _shared_economic_impact_analysis, ) @@ -189,18 +193,18 @@ def budget_variable_names(self) -> list[str]: ] @property - def programs(self) -> dict[str, dict]: + def programs(self) -> dict[str, ProgramDefinition]: return { - "income_tax": {"entity": "person", "is_tax": True}, - "national_insurance": {"entity": "person", "is_tax": True}, - "vat": {"entity": "household", "is_tax": True}, - "council_tax": {"entity": "household", "is_tax": True}, - "universal_credit": {"entity": "person", "is_tax": False}, - "child_benefit": {"entity": "person", "is_tax": False}, - "pension_credit": {"entity": "person", "is_tax": False}, - "income_support": {"entity": "person", "is_tax": False}, - "working_tax_credit": {"entity": "person", "is_tax": False}, - "child_tax_credit": {"entity": "person", "is_tax": False}, + "income_tax": ProgramDefinition(entity="person", is_tax=True), + "national_insurance": ProgramDefinition(entity="person", is_tax=True), + "vat": ProgramDefinition(entity="household", is_tax=True), + "council_tax": ProgramDefinition(entity="household", is_tax=True), + "universal_credit": ProgramDefinition(entity="person", is_tax=False), + "child_benefit": ProgramDefinition(entity="person", is_tax=False), + "pension_credit": ProgramDefinition(entity="person", is_tax=False), + "income_support": ProgramDefinition(entity="person", is_tax=False), + "working_tax_credit": ProgramDefinition(entity="person", is_tax=False), + "child_tax_credit": ProgramDefinition(entity="person", is_tax=False), } def compute_poverty( @@ -228,6 +232,9 @@ def compute_inequality( ) +UK_STRATEGY = UKAnalysisStrategy() + + def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, @@ -236,5 +243,5 @@ def economic_impact_analysis( return _shared_economic_impact_analysis( baseline_simulation, reform_simulation, - UKAnalysisStrategy(), + UK_STRATEGY, ) diff --git a/src/policyengine/tax_benefit_models/us/analysis.py b/src/policyengine/tax_benefit_models/us/analysis.py index 4e8540b6..85b5d77c 100644 --- a/src/policyengine/tax_benefit_models/us/analysis.py +++ b/src/policyengine/tax_benefit_models/us/analysis.py @@ -10,7 +10,11 @@ from policyengine.core import Simulation from policyengine.core.policy import Policy -from policyengine.outputs.analysis_strategy import InequalityResult, PovertyResult +from policyengine.outputs.analysis_strategy import ( + InequalityResult, + PovertyResult, + ProgramDefinition, +) from policyengine.outputs.economic_impact import ( economic_impact_analysis as _shared_economic_impact_analysis, ) @@ -207,14 +211,14 @@ def budget_variable_names(self) -> list[str]: ] @property - def programs(self) -> dict[str, dict]: + def programs(self) -> dict[str, ProgramDefinition]: return { - "income_tax": {"entity": "tax_unit", "is_tax": True}, - "employee_payroll_tax": {"entity": "person", "is_tax": True}, - "snap": {"entity": "spm_unit", "is_tax": False}, - "tanf": {"entity": "spm_unit", "is_tax": False}, - "ssi": {"entity": "spm_unit", "is_tax": False}, - "social_security": {"entity": "person", "is_tax": False}, + "income_tax": ProgramDefinition(entity="tax_unit", is_tax=True), + "employee_payroll_tax": ProgramDefinition(entity="person", is_tax=True), + "snap": ProgramDefinition(entity="spm_unit", is_tax=False), + "tanf": ProgramDefinition(entity="spm_unit", is_tax=False), + "ssi": ProgramDefinition(entity="spm_unit", is_tax=False), + "social_security": ProgramDefinition(entity="person", is_tax=False), } def compute_poverty( @@ -244,6 +248,9 @@ def compute_inequality( ) +US_STRATEGY = USAnalysisStrategy() + + def economic_impact_analysis( baseline_simulation: Simulation, reform_simulation: Simulation, @@ -252,5 +259,5 @@ def economic_impact_analysis( return _shared_economic_impact_analysis( baseline_simulation, reform_simulation, - USAnalysisStrategy(), + US_STRATEGY, ) diff --git a/tests/test_economic_impact_outputs.py b/tests/test_economic_impact_outputs.py index 21f1c4d8..f5a1dd33 100644 --- a/tests/test_economic_impact_outputs.py +++ b/tests/test_economic_impact_outputs.py @@ -1,13 +1,14 @@ """Tests for the new economic impact output modules.""" -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import numpy as np import pandas as pd import pytest from microdf import MicroDataFrame -from policyengine.outputs.analysis_strategy import AnalysisStrategy +from policyengine.outputs.analysis_strategy import AnalysisStrategy, ProgramDefinition +from policyengine.outputs.budget_summary import compute_budget_summary from policyengine.outputs.change_aggregate import ChangeAggregate, ChangeAggregateType from policyengine.outputs.decile_impact import DecileImpact, compute_decile_impacts @@ -39,7 +40,7 @@ def _make_sim(household_data: dict, variables: list | None = None) -> MagicMock: # --------------------------------------------------------------------------- -# AnalysisStrategy tests +# US AnalysisStrategy tests # --------------------------------------------------------------------------- @@ -67,7 +68,7 @@ def test_us_strategy_conforms_to_protocol(): def test_us_strategy_program_structure(): - """Each program entry should have 'entity' and 'is_tax' keys.""" + """Each program entry should be a ProgramDefinition with 'entity' and 'is_tax'.""" from policyengine.tax_benefit_models.us.analysis import USAnalysisStrategy for name, info in USAnalysisStrategy().programs.items(): @@ -75,6 +76,173 @@ def test_us_strategy_program_structure(): assert "is_tax" in info, f"US program {name} missing 'is_tax'" +# --------------------------------------------------------------------------- +# UK AnalysisStrategy tests (conditional on policyengine_uk being installed) +# --------------------------------------------------------------------------- + +uk_installed = pytest.importorskip( + "policyengine_uk", reason="policyengine_uk not installed" +) + + +def test_uk_strategy_programs(): + """UKAnalysisStrategy should contain expected programme keys.""" + from policyengine.tax_benefit_models.uk.analysis import UKAnalysisStrategy + + expected = { + "income_tax", + "national_insurance", + "vat", + "council_tax", + "universal_credit", + "child_benefit", + "pension_credit", + "income_support", + "working_tax_credit", + "child_tax_credit", + } + assert set(UKAnalysisStrategy().programs.keys()) == expected + + +def test_uk_strategy_conforms_to_protocol(): + """UKAnalysisStrategy should satisfy the AnalysisStrategy protocol.""" + from policyengine.tax_benefit_models.uk.analysis import UKAnalysisStrategy + + assert isinstance(UKAnalysisStrategy(), AnalysisStrategy) + + +def test_uk_strategy_program_structure(): + """Each programme entry should be a ProgramDefinition with 'entity' and 'is_tax'.""" + from policyengine.tax_benefit_models.uk.analysis import UKAnalysisStrategy + + for name, info in UKAnalysisStrategy().programs.items(): + assert "entity" in info, f"UK programme {name} missing 'entity'" + assert "is_tax" in info, f"UK programme {name} missing 'is_tax'" + + +# --------------------------------------------------------------------------- +# Shared economic_impact_analysis tests +# --------------------------------------------------------------------------- + + +def test_economic_impact_analysis_rejects_bad_strategy(): + """economic_impact_analysis should raise TypeError for non-strategy objects.""" + from policyengine.outputs.economic_impact import economic_impact_analysis + + sim = MagicMock() + with pytest.raises(TypeError, match="AnalysisStrategy protocol"): + economic_impact_analysis(sim, sim, "not_a_strategy") + + +def test_economic_impact_analysis_calls_ensure(): + """economic_impact_analysis should call ensure() on both simulations.""" + from policyengine.outputs.economic_impact import economic_impact_analysis + + baseline = MagicMock() + reform = MagicMock() + + strategy = MagicMock(spec=AnalysisStrategy) + strategy.income_variable = "household_net_income" + strategy.budget_variable_names = ["household_tax"] + strategy.programs = { + "income_tax": ProgramDefinition(entity="tax_unit", is_tax=True) + } + strategy.compute_poverty.return_value = MagicMock() + strategy.compute_inequality.return_value = MagicMock() + + with ( + patch( + "policyengine.outputs.economic_impact.compute_decile_impacts" + ) as mock_decile, + patch( + "policyengine.outputs.economic_impact.compute_intra_decile_impacts" + ) as mock_intra, + patch( + "policyengine.outputs.economic_impact.compute_budget_summary" + ) as mock_budget, + patch( + "policyengine.outputs.economic_impact.compute_program_statistics" + ) as mock_prog, + patch("policyengine.outputs.economic_impact.PolicyReformAnalysis"), + ): + economic_impact_analysis(baseline, reform, strategy) + + baseline.ensure.assert_called_once() + reform.ensure.assert_called_once() + mock_decile.assert_called_once() + mock_intra.assert_called_once() + mock_budget.assert_called_once() + mock_prog.assert_called_once() + strategy.compute_poverty.assert_called_once_with(baseline, reform) + strategy.compute_inequality.assert_called_once_with(baseline, reform) + + +# --------------------------------------------------------------------------- +# compute_budget_summary tests +# --------------------------------------------------------------------------- + + +def _make_budget_sim(variable_data: dict, variables: list) -> MagicMock: + """Create a mock simulation for budget summary testing.""" + sim = MagicMock() + sim.output_dataset.data.household = MicroDataFrame( + pd.DataFrame(variable_data), + weights="household_weight", + ) + sim.id = "test-budget-sim" + sim.tax_benefit_model_version.variables = variables + + def get_variable(name): + for v in variables: + if v.name == name: + return v + raise ValueError(f"Variable '{name}' not found in model") + + sim.tax_benefit_model_version.get_variable = get_variable + return sim + + +def test_compute_budget_summary_looks_up_entity_from_tbm(): + """compute_budget_summary should resolve entity from TBM, not from caller.""" + variables = [ + _make_variable_mock("household_tax", "household"), + _make_variable_mock("household_benefits", "household"), + ] + sim = _make_budget_sim( + { + "household_tax": [5000.0], + "household_benefits": [2000.0], + "household_weight": [1.0], + }, + variables, + ) + + # Patch BudgetSummaryItem + OutputCollection to bypass Pydantic validation + with ( + patch("policyengine.outputs.budget_summary.BudgetSummaryItem") as MockBSI, + patch("policyengine.outputs.budget_summary.OutputCollection"), + ): + MockBSI.return_value = MagicMock() + compute_budget_summary(sim, sim, ["household_tax", "household_benefits"]) + + assert MockBSI.call_count == 2 + calls = MockBSI.call_args_list + assert calls[0].kwargs["entity"] == "household" + assert calls[1].kwargs["entity"] == "household" + + +def test_compute_budget_summary_variable_not_found(): + """compute_budget_summary should raise ValueError for unknown variable.""" + variables = [_make_variable_mock("household_tax", "household")] + sim = _make_budget_sim( + {"household_tax": [5000.0], "household_weight": [1.0]}, + variables, + ) + + with pytest.raises(ValueError, match="not found in model"): + compute_budget_summary(sim, sim, ["nonexistent_variable"]) + + # --------------------------------------------------------------------------- # DecileImpact tests # ---------------------------------------------------------------------------