datacommonsorg · abhishekjaisw · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/README.md b/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/README.md
@@ -0,0 +1,39 @@
+# CDCWonder_NNDSS_Infectious_Weekly
+
+## Overview
+Notifiable Infectious Diseases Data: Weekly tables from CDC WONDER which has the incident counts of different infectious diseases per Previous 52 week that are reported by the 50 states, New York City, the District of Columbia, and the U.S. territories.
+
+## Data Source
+**Source URL:**
+`https://data.cdc.gov/api/views/x9gk-5huc/rows.csv?accessType=DOWNLOAD&api_foundry=true`
+
+## How To Download Input Data
+To download and process the data, you'll need to run the provided preprocess script, `preprocess.py`. This script will automatically create an "input_files" folder where you should place the file to be processed.By using this script, we are creating one more columns in the input files such as 'observationDate'. 
+
+statvars: Infectious Diseases
+
+## Download the data: 
+For download and preprocess the source data, run:
+```python3 preprocess.py```
+
+## Processing Instructions
+To process  data and generate statistical variables, use the following command from the "data" directory:
+
+**For Test Data Run**
+```
+python3 tools/statvar_importer/stat_var_processor.py \
+  --input_data=statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/testdata/NNDSS_Weekly_Data.csv \
+  --pv_map=statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/nndss_weekly_pvmap.csv \
+  --config_file=statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/nndss_weekly_metadata.csv \
+  --output_path=statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/testdata/nndss_weekly_output 
+```
+
+**For Main data run**
+```bash
+python3 tools/statvar_importer/stat_var_processor.py \
+  --input_data=statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/input_files/NNDSS_Weekly_Data.csv \
+  --pv_map=statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/nndss_weekly_pvmap.csv \
+  --config_file=statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/nndss_weekly_metadata.csv \
+  --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf \
+  --output_path=statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/output/nndss_weekly_output
+```
diff --git a/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/manifest.json b/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/manifest.json
@@ -0,0 +1,36 @@
+{
+    "import_specifications": [
+        {
+            "import_name": "CDCWonder_NNDSS_Infectious_Weekly",
+            "curator_emails": [
+                "support@datacommons.org"
+            ],
+            "provenance_url": "https://data.cdc.gov/api/views/x9gk-5huc/rows.csv?accessType=DOWNLOAD&api_foundry=true",
+            "provenance_description": "Notifiable Infectious Diseases Data: Weekly tables from CDC WONDER which has the incident counts of different infectious diseases per week that are reported by the 50 states, New York City, the District of Columbia, and the U.S. territories.",
+            "scripts": [
+                "preprocess.py",
+                "python ../../../tools/statvar_importer/stat_var_processor.py --input_data=input_files/NNDSS_Weekly_Data.csv --pv_map='nndss_weekly_pvmap.csv' --config_file=nndss_weekly_metadata.csv  --output_path=output/nndss_weekly_output"
+            ],
+            "import_inputs": [
+                {
+                    "template_mcf": "output/nndss_weekly_output.tmcf",
+                    "cleaned_csv": "output/nndss_weekly_output.csv",
+                    "node_mcf": "output/*.mcf"
+                }
+            ],
+            "source_files": [
+                "input_files/NNDSS_Weekly_Data.csv"
+            ],
+        "cron_schedule": "00 11 1,15 * *",
+        "resource_limits": {"cpu": 4, "memory": 8, "disk": 100},
+        "config_override": {
+            "invoke_import_validation": true,
+            "invoke_import_tool": true,
+            "invoke_differ_tool": true,
+            "skip_input_upload": false,
+            "skip_gcs_upload": false,
+            "cleanup_gcs_volume_mount": false
+                            }
+        }
+    ]
+}
diff --git a/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/nndss_weekly_metadata.csv b/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/nndss_weekly_metadata.csv
@@ -0,0 +1,7 @@
+parameter,val
+mapped_rows,1
+mapped_columns,5
+header_rows,1
+#places_resolved_csv,
+input_columns,8
+#input_rows,1000
diff --git a/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/nndss_weekly_pvmap.csv b/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/nndss_weekly_pvmap.csv
diff --git a/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/preprocess.py b/statvar_imports/cdc/CDCWonder_NNDSS_InfectiousWeekly/preprocess.py
@@ -0,0 +1,139 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, sys
+import pandas as pd
+from absl import app, logging
+from pathlib import Path
+import datetime
+
+script_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.join(script_dir, '../../../util'))
+from download_util_script import download_file
+INPUT_DIR = os.path.join(script_dir, "input_files")
+Path(INPUT_DIR).mkdir(parents=True, exist_ok=True)
+INPUT_FILE = os.path.join(INPUT_DIR, "rows.csv")
+NEW_FILE = os.path.join(INPUT_DIR, "NNDSS_Weekly_Data.csv")
+SOURCE_URL = "https://data.cdc.gov/api/views/x9gk-5huc/rows.csv?accessType=DOWNLOAD&api_foundry=true"
+
+
+def _start_date_of_year(year: int) -> datetime.date:
+    """
+    Return start date of the year using MMWR week rules.
+
+    A year's first MMWR week is the first full calendar week (Sunday-Saturday)
+    that contains at least four days of the new year. This means the start
+    date of the first MMWR week can be in the previous calendar year.
+    """
+    jan_one = datetime.date(year, 1, 1)
+
+    # Calculate the day difference to get to the first Sunday of the year.
+    # The condition jan_one.isoweekday() > 3 accounts for the rule that
+    # Jan 1 must be in the first week if it falls on a Mon, Tue, Wed, or Thu.
+    diff = 7 * (jan_one.isoweekday() > 3) - jan_one.isoweekday()
+
+    return jan_one + datetime.timedelta(days=diff)
+
+def get_mmwr_week_start_date(year, week) -> datetime.date:
+    """
+    Return the start date of an MMWR week (starts at Sunday).
+    The provided code originally had 'end_date' in the name but calculated
+    the start date. To maintain the original logic, this returns the start date.
+    """
+    day_one = _start_date_of_year(year)
+    diff = 7 * (week - 1)
+    return day_one + datetime.timedelta(days=diff)
+
+def preprocess_data(filepath: str):
+    """
+    Reads a CSV file, adds a new column 'observationDate' by calculating
+    the MMWR week start date, and saves the changes back to the same file.
+    The new column is placed immediately after the 'MMWR WEEK' column.
+
+    Args:
+        filepath (str): The path to the CSV file to read and update.
+    """
+    try:
+        # Read the CSV file into a pandas DataFrame
+        df = pd.read_csv(filepath)
+
+        # Define the required column names.
+        required_cols = ['Current MMWR Year', 'MMWR WEEK']
+
+        # Check for the required columns. A KeyError will be raised if they don't exist.
+        if not all(col in df.columns for col in required_cols):
+            # This check is good practice, although the KeyError below would also catch it.
+            raise KeyError(f"The file must contain the columns: {required_cols}.")
+
+        # Use a vectorized operation with .apply() for better performance.
+        # This applies the get_mmwr_week_end_date function to each row.
+        df['observationDate'] = df.apply(
+            lambda row: get_mmwr_week_start_date(row['Current MMWR Year'], row['MMWR WEEK']),
+            axis=1
+        )
+
+        # Reorder the columns to place 'observationDate' after 'MMWR WEEK'
+        cols = list(df.columns)
+        mmwr_week_index = cols.index('MMWR WEEK')
+        # Move 'observationDate' from the end to its new position
+        observation_date_col = cols.pop()  # It was added as the last column
+        cols.insert(mmwr_week_index + 1, observation_date_col)
+        # Reassign the DataFrame with the new column order
+        df = df[cols]
+        # Save the updated DataFrame back to the same CSV file
+        df.to_csv(filepath, index=False)
+        logging.info(f"Success: File '{filepath}' has been updated and saved.")
+
+    except FileNotFoundError:
+        logging.fatal(f"Error: The file '{filepath}' was not found.")
+        raise RuntimeError(f"Error: The file '{filepath}' was not found.")
+    except KeyError as e:
+        logging.fatal(f"Error: Missing a required column. Details: {e}")
+        raise RuntimeError(f"Error: Missing a required column. Details: {e}")
+    except Exception as e:
+        logging.fatal(f"An unexpected error occurred: {e}")
+        raise RuntimeError(f"Import job failed An unexpected error occurred: {e}")
+
+def main(argv):
+    try:
+        download_file(url=SOURCE_URL,
+                  output_folder=INPUT_DIR,
+                  unzip=False,
+                  headers= None,
+                  tries= 3,
+                  delay= 5,
+                  backoff= 2)
+    except Exception as e:
+        logging.fatal(f"Failed to download NNDSS weekly data file,{e}")
+        raise RuntimeError(f"Failed to download NNDSS weekly data file,{e}")
+
+    # 1. Preprocess the data
+    preprocess_data(INPUT_FILE)
+
+    # 2. Rename the file
+    try:
+        if os.path.exists(INPUT_FILE):
+            # If NNDSS_Weekly_Data.csv already exists from a previous run, remove it first
+            if os.path.exists(NEW_FILE):
+                os.remove(NEW_FILE)
+            os.rename(INPUT_FILE, NEW_FILE)
+            logging.info(f"Successfully renamed 'rows.csv' to 'NNDSS_Weekly_Data.csv'")
+        else:
+            logging.warning(f"Could not rename file. '{INPUT_FILE}' does not exist.")
+    except Exception as e:
+        logging.fatal(f"Failed to rename file: {e}")
+        raise RuntimeError(f"Failed to rename file: {e}")
+
+if __name__ == "__main__":
+    app.run(main)