diff --git a/changelog.d/align-ssi-takeup-to-reported.changed.md b/changelog.d/align-ssi-takeup-to-reported.changed.md new file mode 100644 index 00000000..3f3e1126 --- /dev/null +++ b/changelog.d/align-ssi-takeup-to-reported.changed.md @@ -0,0 +1 @@ +Align SSI takeup and disability flags to CPS-reported receipt. diff --git a/docs/calibration_internals.ipynb b/docs/calibration_internals.ipynb index 843ddf5f..8f7a0f5b 100644 --- a/docs/calibration_internals.ipynb +++ b/docs/calibration_internals.ipynb @@ -775,9 +775,9 @@ "source": [ "raw[\"original_value\"] = raw[\"value\"].copy()\n", "raw[\"uprating_factor\"] = raw.apply(\n", - " lambda r: builder._get_uprating_info(\n", - " r[\"variable\"], r[\"period\"], uprating_factors\n", - " )[0],\n", + " lambda r: builder._get_uprating_info(r[\"variable\"], r[\"period\"], uprating_factors)[\n", + " 0\n", + " ],\n", " axis=1,\n", ")\n", "raw[\"value\"] = raw[\"original_value\"] * raw[\"uprating_factor\"]\n", @@ -822,9 +822,7 @@ " for fips, abbr in sample_states.items():\n", " cd_state = cd_domain[\n", " cd_domain[\"geographic_id\"].apply(\n", - " lambda g, s=fips: int(g) // 100 == s\n", - " if g not in (\"US\",)\n", - " else False\n", + " lambda g, s=fips: int(g) // 100 == s if g not in (\"US\",) else False\n", " )\n", " ]\n", " if cd_state.empty:\n", @@ -881,9 +879,7 @@ " for fips, abbr in sorted(STATE_CODES.items()):\n", " cd_rows = cd_result[\n", " cd_result[\"geographic_id\"].apply(\n", - " lambda g, s=fips: int(g) // 100 == s\n", - " if g not in (\"US\",)\n", - " else False\n", + " lambda g, s=fips: int(g) // 100 == s if g not in (\"US\",) else False\n", " )\n", " ]\n", " if cd_rows.empty:\n", diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index 418d7396..d0ef0fd0 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -15,6 +15,10 @@ from microimpute.models.qrf import QRF import logging from policyengine_us_data.parameters import load_take_up_rate +from policyengine_us_data.datasets.cps.takeup import ( + align_reported_ssi_disability, + prioritize_reported_recipients, +) from policyengine_us_data.utils.randomness import seeded_rng @@ -224,19 +228,10 @@ def add_takeup(self): # SNAP: prioritize reported recipients rng = seeded_rng("takes_up_snap_if_eligible") reported_snap = data["snap_reported"] > 0 - - # Calculate adjusted rate for non-reporters to hit target - n_snap_reporters = reported_snap.sum() - n_snap_non_reporters = (~reported_snap).sum() - target_snap_takeup_count = int(snap_rate * n_spm_units) - remaining_snap_needed = max(0, target_snap_takeup_count - n_snap_reporters) - snap_non_reporter_rate = ( - remaining_snap_needed / n_snap_non_reporters if n_snap_non_reporters > 0 else 0 - ) - - # Assign: all reporters + adjusted rate for non-reporters - data["takes_up_snap_if_eligible"] = reported_snap | ( - (~reported_snap) & (rng.random(n_spm_units) < snap_non_reporter_rate) + data["takes_up_snap_if_eligible"] = prioritize_reported_recipients( + reported_snap, + snap_rate, + rng.random(n_spm_units), ) # ACA @@ -270,19 +265,10 @@ def add_takeup(self): # SSI: prioritize reported recipients rng = seeded_rng("takes_up_ssi_if_eligible") reported_ssi = data["ssi_reported"] > 0 - - # Calculate adjusted rate for non-reporters to hit target - n_ssi_reporters = reported_ssi.sum() - n_ssi_non_reporters = (~reported_ssi).sum() - target_ssi_takeup_count = int(ssi_rate * n_persons) - remaining_ssi_needed = max(0, target_ssi_takeup_count - n_ssi_reporters) - ssi_non_reporter_rate = ( - remaining_ssi_needed / n_ssi_non_reporters if n_ssi_non_reporters > 0 else 0 - ) - - # Assign: all reporters + adjusted rate for non-reporters - data["takes_up_ssi_if_eligible"] = reported_ssi | ( - (~reported_ssi) & (rng.random(n_persons) < ssi_non_reporter_rate) + data["takes_up_ssi_if_eligible"] = prioritize_reported_recipients( + reported_ssi, + ssi_rate, + rng.random(n_persons), ) # TANF @@ -340,6 +326,16 @@ def add_takeup(self): rng.random(n_tax_units) < voluntary_filing_rate ) + # --- SSI: align disability to CPS-reported receipt --- + # CPS disability flags miss some under-65 SSI recipients, but SSI + # requires under-65 recipients to be disabled or blind. + reported_ssi = data["ssi_reported"] > 0 + data["is_disabled"] = align_reported_ssi_disability( + data["is_disabled"], + reported_ssi, + data["age"], + ) + self.save_dataset(data) diff --git a/policyengine_us_data/datasets/cps/takeup.py b/policyengine_us_data/datasets/cps/takeup.py new file mode 100644 index 00000000..9e6b7890 --- /dev/null +++ b/policyengine_us_data/datasets/cps/takeup.py @@ -0,0 +1,34 @@ +import numpy as np + + +def _validate_same_shape(*arrays: np.ndarray) -> None: + shapes = {np.asarray(array).shape for array in arrays} + if len(shapes) != 1: + raise ValueError("All arrays must have the same shape") + + +def prioritize_reported_recipients( + reported_receipt: np.ndarray, target_rate: float, draws: np.ndarray +) -> np.ndarray: + reported_receipt = np.asarray(reported_receipt, dtype=bool) + draws = np.asarray(draws) + _validate_same_shape(reported_receipt, draws) + + n_entities = reported_receipt.size + n_reporters = reported_receipt.sum() + n_non_reporters = (~reported_receipt).sum() + target_takeup_count = int(target_rate * n_entities) + remaining_needed = max(0, target_takeup_count - n_reporters) + non_reporter_rate = remaining_needed / n_non_reporters if n_non_reporters > 0 else 0 + + return reported_receipt | ((~reported_receipt) & (draws < non_reporter_rate)) + + +def align_reported_ssi_disability( + is_disabled: np.ndarray, reported_ssi: np.ndarray, ages: np.ndarray +) -> np.ndarray: + is_disabled = np.asarray(is_disabled, dtype=bool) + reported_ssi = np.asarray(reported_ssi, dtype=bool) + ages = np.asarray(ages) + _validate_same_shape(is_disabled, reported_ssi, ages) + return is_disabled | (reported_ssi & (ages < 65)) diff --git a/policyengine_us_data/tests/test_datasets/test_cps_takeup.py b/policyengine_us_data/tests/test_datasets/test_cps_takeup.py new file mode 100644 index 00000000..db01df82 --- /dev/null +++ b/policyengine_us_data/tests/test_datasets/test_cps_takeup.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest + +from policyengine_us_data.datasets.cps.takeup import ( + align_reported_ssi_disability, + prioritize_reported_recipients, +) + + +def test_prioritize_reported_recipients_preserves_reporters(): + reported = np.array([True, False, False, True, False]) + draws = np.array([0.9, 0.1, 0.7, 0.8, 0.9]) + + result = prioritize_reported_recipients(reported, 0.6, draws) + + np.testing.assert_array_equal( + result, + np.array([True, True, False, True, False]), + ) + + +def test_prioritize_reported_recipients_caps_non_reporters_at_zero(): + reported = np.array([True, False, True, True]) + draws = np.array([0.2, 0.1, 0.3, 0.4]) + + result = prioritize_reported_recipients(reported, 0.5, draws) + + np.testing.assert_array_equal( + result, + np.array([True, False, True, True]), + ) + + +def test_prioritize_reported_recipients_requires_matching_shapes(): + with pytest.raises(ValueError): + prioritize_reported_recipients( + np.array([True, False]), + 0.5, + np.array([0.1]), + ) + + +def test_align_reported_ssi_disability_marks_under_65_reporters_disabled(): + result = align_reported_ssi_disability( + is_disabled=np.array([False, False, True, False]), + reported_ssi=np.array([True, True, False, False]), + ages=np.array([40, 70, 30, 20]), + ) + + np.testing.assert_array_equal( + result, + np.array([True, False, True, False]), + )