diff --git a/.gitignore b/.gitignore index 8ab39ee..b53cb86 100644 --- a/.gitignore +++ b/.gitignore @@ -129,6 +129,8 @@ dmypy.json # Pyre type checker .pyre/ +# VS Code settings +.vscode # Other Notebooks @@ -137,4 +139,5 @@ pytheranostics/local pytheranostics/.vscode pytheranostics/data/s-values -pytheranostics/data/ICRP_phantom_masses \ No newline at end of file +pytheranostics/data/ICRP_phantom_masses +pytheranostics/data/voxel_kernels \ No newline at end of file diff --git a/docs/source/tutorials/Data_Ingestion_Examples/Data_Ingestion_Examples.ipynb b/docs/source/tutorials/Data_Ingestion_Examples/Data_Ingestion_Examples.ipynb index 6e691f8..ad83d00 100644 --- a/docs/source/tutorials/Data_Ingestion_Examples/Data_Ingestion_Examples.ipynb +++ b/docs/source/tutorials/Data_Ingestion_Examples/Data_Ingestion_Examples.ipynb @@ -25,7 +25,7 @@ "# Imports\n", "from pathlib import Path\n", "import pytheranostics as tx\n", - "from pytheranostics.ImagingDS.dicom_ingest import auto_setup_dosimetry_study, extract_patient_metadata\n", + "from pytheranostics.imaging_ds.dicom_ingest import auto_setup_dosimetry_study, extract_patient_metadata\n", "\n", "print('PyTheranostics version:', getattr(tx, '__version__', 'unknown'))" ] @@ -201,8 +201,14 @@ } ], "metadata": { + "kernelspec": { + "display_name": ".pyThera-DBG", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "name": "python", + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/source/tutorials/Dosimetry_Pipeline/voxel_based_dosimetry_with_lu177_dose_kernel.ipynb b/docs/source/tutorials/Dosimetry_Pipeline/voxel_based_dosimetry_with_lu177_dose_kernel.ipynb new file mode 100644 index 0000000..cbc1cc9 --- /dev/null +++ b/docs/source/tutorials/Dosimetry_Pipeline/voxel_based_dosimetry_with_lu177_dose_kernel.ipynb @@ -0,0 +1,459 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1dc3882a", + "metadata": {}, + "source": [ + "# Project Setup and Initialization\n", + "Initialize new project. \n", + "\n", + "In this tutorial we will run a full dosimetry pipeline over the SNNMI Dosimetry Challenge Dataset.\n", + "\n", + "This dataset contains anonymized CT and SPECT DICOM images suitable for testing segmentation and dosimetry workflows. Data is sourced from the University of Michigan Deep Blue repository (DOI: 10.7302/864r-tb45)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa35bf4d", + "metadata": {}, + "outputs": [], + "source": [ + "from pytheranostics.data_fetchers import fetch_snmmi_dosimetry_challenge\n", + "from pytheranostics.imaging_ds.dicom_ingest import auto_setup_dosimetry_study_inventory\n", + "from pytheranostics.imaging_ds import create_studies_with_masks\n", + "from pytheranostics.segmentation import totalseg_segment, convert_masks_to_rtstruct\n", + "from pytheranostics.dosimetry import build_roi_fit_config\n", + "from pytheranostics.dosimetry.voxel_s_dosimetry import VoxelSDosimetry\n", + "from pathlib import Path\n", + "import logging\n", + "root = logging.getLogger()\n", + "root.setLevel(logging.INFO)\n", + "\n", + "# Ensure a handler exists and is set to INFO\n", + "if not root.handlers:\n", + " handler = logging.StreamHandler()\n", + " handler.setLevel(logging.INFO)\n", + " formatter = logging.Formatter(\"%(levelname)s:%(name)s:%(message)s\")\n", + " handler.setFormatter(formatter)\n", + " root.addHandler(handler)\n", + "else:\n", + " for h in root.handlers:\n", + " h.setLevel(logging.INFO)\n", + "\n", + "\n", + "%reload_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "id": "a5bac369", + "metadata": {}, + "source": [ + "## Step 0: Initialize a New Project\n", + "\n", + "Before starting, you can use PyTheranostics' project initialization tool to set up a standardized project structure with configuration templates:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e52ae92", + "metadata": {}, + "outputs": [], + "source": [ + "from pytheranostics import init_project\n", + "\n", + "# Create a new project with all templates and standard directories\n", + "project_base = Path(\"./snmmi_dosimetry_challenge_project\")\n", + "init_project(project_base)" + ] + }, + { + "cell_type": "markdown", + "id": "d10172d8", + "metadata": {}, + "source": [ + "## Step 1: Download Example Data\n", + "\n", + "We'll use the SNMMI Dosimetry Challenge dataset from University of Michigan Deep Blue. This contains multi-timepoint SPECT/CT data and we will focus on Patient_004." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f565a978", + "metadata": {}, + "outputs": [], + "source": [ + "fetch_snmmi_dosimetry_challenge(data_home=str(project_base))" + ] + }, + { + "cell_type": "markdown", + "id": "300d1806", + "metadata": {}, + "source": [ + "## Step 2: Segmentation using TotalSegmentator.\n", + "\n", + "Please take a look at the Total Segmentator Tutorial for more details (add link).\n", + "\n", + "### 2.1: Set up folders for where TotalSegmentator will write segmentation masks and where RT-STRUCT files will be saved." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e686b71e", + "metadata": {}, + "outputs": [], + "source": [ + "# Provide a list of ROOT folder where all CT series under it will be discovered automatically.\n", + "# In this case, as an example, we provide the path to a single CT series at the first scan of Patient_004 from the downloaded dataset.\n", + "project_data_dirs = [project_base / f\"snmmi_dose_challenge/Patient_004/SPECT_Cts/scan{i}/ct\" for i in range(1, 5)]\n", + "\n", + "# Specify paths of Where to write segmentations and RT-STRUCT outputs (will be grouped by PatientID)\n", + "totalsegmentator_output_dir = project_base / \"Segmentations\"\n", + "rtstruct_output_dir = project_base / \"rtstructs\"" + ] + }, + { + "cell_type": "markdown", + "id": "fbacd88c", + "metadata": {}, + "source": [ + "### 2.2: Run TotalSegmentator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7fefba7a", + "metadata": {}, + "outputs": [], + "source": [ + "# Run segmentation on each time point and save results in the specified output directory \n", + "seg_results = []\n", + "\n", + "for project_data_dir in project_data_dirs:\n", + "\n", + " seg_result = totalseg_segment(\n", + " root_dir=str(project_data_dir),\n", + " base_output_dir=str(totalsegmentator_output_dir),\n", + " device=\"gpu\", # Change to \"gpu\" if using an NVIDIA GPU, or \"cpu\" to run on CPU. If GPU, set number of workers to 1 and parallel=False.\n", + " parallel=False,\n", + " max_workers=1, # Number of parallel workers to use if parallel=True\n", + " )\n", + " \n", + " seg_results.append(seg_result)" + ] + }, + { + "cell_type": "markdown", + "id": "8953fb77", + "metadata": {}, + "source": [ + "### 2.3: Select organs at risk and convert binary masks to RTStruct format.\n", + "\n", + "#### Using Configuration Files\n", + "\n", + "By default, converting all 104 structures segmented by TotalSegmentator would create a very large RT-STRUCT file. Configuration files let you:\n", + "- **Filter**: Include only the organs you need (e.g., kidneys, liver, spleen)\n", + "- **Rename**: Change organ names to match your workflow conventions\n", + "- **Combine**: Merge multiple structures into one (e.g., all ribs → \"ribs\")\n", + "\n", + "\n", + "For details on configuration files, please refer to the TotalSegmentator tutorial. For this example, we will modify the `total_seg_config.json` with the following content:\n", + "\n", + "\n", + "```json\n", + "{\n", + " \"vois\": [\n", + " {\"voi_name\": \"kidney_left\", \"include\": true, \"new_name\": \"L Kidney\"},\n", + " {\"voi_name\": \"kidney_right\", \"include\": true, \"new_name\": \"R Kidney\"},\n", + " {\"voi_name\": \"liver\", \"include\": true, \"new_name\": \"Liver\"},\n", + " {\"voi_name\": \"spleen\", \"include\": true, \"new_name\": \"Spleen\"}\n", + " ],\n", + "}\n", + "```\n", + "\n", + "This configuration creates an RT-STRUCT with:\n", + "- **L Kidney**\n", + "- **R Kidney**\n", + "- **Liver**\n", + "- **Spleen**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bea8909b", + "metadata": {}, + "outputs": [], + "source": [ + "# Convert segmentation masks to RT-STRUCT format for use in dosimetry calculations\n", + "rtstruct_results = []\n", + "for seg_result in seg_results:\n", + " rtstruct_result = convert_masks_to_rtstruct(\n", + " segmentation_base_dir=str(totalsegmentator_output_dir),\n", + " ct_series_paths=seg_result[\"ct_paths\"],\n", + " rtstruct_output_dir=str(rtstruct_output_dir ),\n", + " config_path=project_base / \"total_seg_config.json\", # Config file specifying which structures to include\n", + " )\n", + " rtstruct_results.append(rtstruct_result)\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "caeef41a", + "metadata": {}, + "source": [ + "## Step 3: Auto-organize data \n", + "\n", + "See Data_Ingestion_Examples tutorial for details" + ] + }, + { + "cell_type": "markdown", + "id": "ec852c66", + "metadata": {}, + "source": [ + "### 3.0: Move freshly generated rtstruct files into the imaging data folder\n", + "\n", + " In this tutorial, SPECT and CT data lies under `project_folder/snmmi_dose_challenge/Patient_004`. The RTStruct files generated in the previous step are stored under `project_folder/rtstructs`. Please move the RTstruct files under the patient folder so that the auto-setup inventory works.\n", + "\n", + "### 3.1: Run Auto-setup / study inventory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d6c9b2f", + "metadata": {}, + "outputs": [], + "source": [ + "# Organize and extract metadata\n", + "study_info, ct_paths, spect_paths, rtstruct_files = auto_setup_dosimetry_study_inventory(\n", + " base_dir=project_base, # / \"snmmi_dose_challenge/Patient_004\", # Base directory to search for CT and SPECT series\n", + " patient_id=None, # auto-detect from DICOM headers\n", + ")\n", + "\n", + "# Quick summary\n", + "print('Patient ID:', study_info.get('patient_id'))\n", + "inj = study_info.get('injection_info', {})\n", + "print('Injection date:', inj.get('injection_date'), 'time:', inj.get('injection_time'))\n", + "print(f'CT time points: {len(ct_paths)}')\n", + "print(f'SPECT time points: {len(spect_paths)}')\n", + "print(f'RTSTRUCT files: {len(rtstruct_files)}')\n", + "\n", + "# Example: show first CT/SPECT time point folders (if present)\n", + "print('First CT tp:', ct_paths[0] if ct_paths else None)\n", + "print('First SPECT tp:', spect_paths[0] if spect_paths else None)\n", + "print('First RTSTRUCT:', rtstruct_files[0] if rtstruct_files else None)" + ] + }, + { + "cell_type": "markdown", + "id": "6afd0d68", + "metadata": {}, + "source": [ + "## Step 3: Set-up VOI Mapping and Fit/Dosimetry Parameters.\n", + "\n", + "Please visit ROI_Mapping_Tutorial for details (add link)\n", + "\n", + "### 3.1: Modify the `dosimetry_fit_defaults.json` to specify default fit parameters.\n", + "\n", + "```JSON\n", + "{\n", + " \"_description\": \"Apply bi-exponential fitting with uptake phase to all organs by default. Users can override these defaults for specific organs or ROIs by providing a custom configuration file.\",\n", + " \"organ_defaults\": {\n", + " \"fit_order\": 2,\n", + " \"with_uptake\": true,\n", + " \"param_init\": {\"A1\": 100, \"A2\": 0.01},\n", + " \"bounds\": {\"A2\": [0.004345, 1.0]},\n", + " \"fixed_parameters\": null,\n", + " \"bounds\": null,\n", + " \"washout_ratio\": null\n", + " }\n", + "}\n", + "```\n", + "\n", + "\n", + "### 3.2: Modify VOI Mapping to map names of RTStruct ROIs to standard names in pyTheranostics. Modify the key `ct_mappings` and `spect_mappings` inside `voi_mappings_config.json` to map the RTstruct names stored in segmented regions to pyTheranostics standardized names:\n", + "\n", + "```JSON\n", + "\"ct_mappings\": {\n", + " \"LKidney\": \"Kidney_Left\",\n", + " \"RKidney\": \"Kidney_Right\",\n", + " \"Liver\": \"Liver\",\n", + " \"Spleen\": \"Spleen\"\n", + " },\n", + "\n", + " \"spect_mappings\": {\n", + " \"LKidney\": \"Kidney_Left\",\n", + " \"RKidney\": \"Kidney_Right\",\n", + " \"Liver\": \"Liver\",\n", + " \"Spleen\": \"Spleen\"\n", + " }\n", + "\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "id": "4471e804", + "metadata": {}, + "source": [ + "### 3.3: Initialize Dosimetry configuration.\n", + "\n", + "In this step, we build the main configuration dictionary. It is composed of:\n", + "- ROIs fitting properties (obtained from the default `dosimetry_fit_defaults.json` or specified by the user)\n", + "- Patient ID and Injection information, that can be obtained from the `study_info` dictionary or specified by the user\n", + "- Dosimetry Level (Voxel) and method (Voxel-S)\n", + "- Since we are running voxel-S convolution method, we can specify if we scale voxel dose by density.\n", + "- Reference time point to calculate time-integrated activity at the voxel level." + ] + }, + { + "cell_type": "markdown", + "id": "c6ba09d8", + "metadata": {}, + "source": [ + "### 3.3: Load the Data and initialize configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd810f6a", + "metadata": {}, + "outputs": [], + "source": [ + "# Load data AND apply mappings in one step\n", + "longCT, longSPECT, inj, used_mappings = create_studies_with_masks(\n", + " patient_id=study_info[\"patient_id\"],\n", + " cycle_no=1,\n", + " parallel=True,\n", + " mapping_config=project_base / \"voi_mappings_config.json\",\n", + " study_info=study_info\n", + " )\n", + "\n", + "# Initilization of roi configuration \n", + "roi_config = build_roi_fit_config(longSPECT=longSPECT, config_path=project_base / \"dosimetry_fit_defaults.json\")\n", + "\n", + "# Build Dosimetry config\n", + "dosimetry_config = {\n", + " \"PatientID\": study_info[\"patient_id\"],\n", + " \"DatabaseDir\": str(project_base / \"dosimetry_database\"),\n", + " \n", + " \"VOIs\": roi_config,\n", + " \n", + " \"InjectionDate\": study_info[\"time_points\"][0][\"injection_info\"][\"injection_date\"],\n", + " \"InjectionTime\": study_info[\"time_points\"][0][\"injection_info\"][\"injection_time\"],\n", + " \"InjectedActivity\": longSPECT.meta[0].Injected_Activity_MBq,\n", + " \"Radionuclide\": longSPECT.meta[0].Radionuclide,\n", + " \"PatientWeight_g\": study_info[\"time_points\"][0][\"injection_info\"][\"patient_weight_g\"],\n", + " \n", + " \n", + " \"Level\": \"Voxel\",\n", + " \"Method\": \"Voxel-S-value\",\n", + " \"ScaleDoseByDensity\": False,\n", + " \n", + " \"ReferenceTimePoint\": 0\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "id": "827252b6", + "metadata": {}, + "source": [ + "### 3.4: Initialize Dosimetry Calculator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e87cc410", + "metadata": {}, + "outputs": [], + "source": [ + "Dosimetry = VoxelSDosimetry(config=dosimetry_config, nm_data=longSPECT, ct_data=longCT)" + ] + }, + { + "cell_type": "markdown", + "id": "84663d53", + "metadata": {}, + "source": [ + "### 3.5 Compute Dose" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1fc71a4", + "metadata": {}, + "outputs": [], + "source": [ + "Dosimetry.compute_dose()" + ] + }, + { + "cell_type": "markdown", + "id": "4a2607f1", + "metadata": {}, + "source": [ + "### 3.6 Review Results\n", + "\n", + "Time-activity data, time-integrated activity (residence times) at the region level at stored under `Dosimetry.results`. Mean dose per region (derived from Voxelized dose map) is summaried in `Dosimetry.df_ad`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7788cdca", + "metadata": {}, + "outputs": [], + "source": [ + "# View Time-Activity data, parameters of fit, etc.\n", + "Dosimetry.results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "251b81a6", + "metadata": {}, + "outputs": [], + "source": [ + "# View Average Dose\n", + "Dosimetry.df_ad" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".pyThera-DBG", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/source/tutorials/segmentation/total_segmentator_tutorial.ipynb b/docs/source/tutorials/segmentation/total_segmentator_tutorial.ipynb index 71e46e2..9d4b055 100644 --- a/docs/source/tutorials/segmentation/total_segmentator_tutorial.ipynb +++ b/docs/source/tutorials/segmentation/total_segmentator_tutorial.ipynb @@ -212,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "fce08eb7", "metadata": {}, "outputs": [ @@ -313,7 +313,7 @@ "seg_result = totalseg_segment(\n", " root_dir=str(project_data_dir),\n", " base_output_dir=str(totalsegmentator_output_dir),\n", - " device=\"mps\", # Change to \"cuda\" if using an NVIDIA GPU, or \"cpu\" to run on CPU\n", + " device=\"mps\", # Change to \"gpu\" if using an NVIDIA GPU, or \"cpu\" to run on CPU\n", " parallel=True,\n", " max_workers=4, # Number of parallel workers to use if parallel=True\n", ")" @@ -515,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "d5192b60", "metadata": {}, "outputs": [ diff --git a/pyproject.toml b/pyproject.toml index ec73121..52fc43f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -163,6 +163,7 @@ profile = "black" markers = [ "smoke: marks tests as smoke tests (quick functionality checks)", "slow: marks tests as slow (heavy computation or large data)", + "integration: marks tests as integration tests spanning multiple subsystems", ] filterwarnings = [ "ignore:.*SwigPy.*:DeprecationWarning", diff --git a/pytheranostics/dosimetry/base_dosimetry.py b/pytheranostics/dosimetry/base_dosimetry.py index 8af3cd5..36e33cc 100644 --- a/pytheranostics/dosimetry/base_dosimetry.py +++ b/pytheranostics/dosimetry/base_dosimetry.py @@ -87,9 +87,14 @@ def __init__( self.toMBq = 1e-6 # Factor to scale activity from Bq to MBq # Store data - self.patient_id = config["PatientID"] - self.cycle = config["Cycle"] - self.db_dir = Path(config["DatabaseDir"]) + self.patient_id = ( + config["PatientID"] if "PatientID" in config else "UnknownPatient" + ) + self.cycle = config["Cycle"] if "Cycle" in config else 1 + self.db_dir = ( + Path(config["DatabaseDir"]) if "DatabaseDir" in config else Path("./") + ) + self.check_mandatory_fields() self.check_patient_in_db() # TODO: Traceability/database? @@ -231,6 +236,20 @@ def check_mandatory_fields(self) -> None: print("No Reference Time point was given. Assigning time ID = 0") self.config["ReferenceTimePoint"] = 0 + # If WholeBody and RemainderOfBody were not defined by the user, add them by default to the VOIs to ensure consistency with dosimetry calculations. + for missing in ["WholeBody", "RemainderOfBody"]: + if missing not in self.config["VOIs"]: + print( + f"Adding {missing} to the list of VOIs with default parameters. This region is required for dosimetry calculations." + ) + self.config["VOIs"][missing] = { + "fit_order": None, + "with_uptake": None, + "fixed_parameters": None, + "bounds": None, + "param_init": None, + } + if "Organ" in self.config["Level"]: if "WholeBody" not in self.config["VOIs"]: if "No" in self.config["OrganLevel"]["AdditionalOptions"]["WholeBody"]: @@ -255,6 +274,7 @@ def initialize(self) -> pandas.DataFrame: roi_name: [] for roi_name in self.nm_data.masks[0].keys() if roi_name in self.config["VOIs"] + or roi_name in ["WholeBody", "RemainderOfBody"] } cols: List[str] = ["Time_hr", "Volume_CT_mL", "Activity_MBq", "Density_HU"] @@ -411,6 +431,12 @@ def compute_tia(self) -> None: } for region, region_data in self.results.iterrows(): + + if not isinstance(region, str): + raise TypeError( + f"Region names should be strings. Found {type(region)} instead." + ) + fit_results = self.smart_fit_selection( region_data=region_data, region=region ) @@ -418,7 +444,7 @@ def compute_tia(self) -> None: plot_tac_residuals( result=fit_results, region=region, - cycle=self.config["Cycle"], + cycle=self.cycle, output_dir=self.db_dir, ) diff --git a/pytheranostics/dosimetry/config.py b/pytheranostics/dosimetry/config.py index 8902658..030c9e3 100644 --- a/pytheranostics/dosimetry/config.py +++ b/pytheranostics/dosimetry/config.py @@ -159,6 +159,12 @@ def build_roi_fit_config( for tp_masks in getattr(longSPECT, "masks", {}).values(): all_masks.update(tp_masks.keys()) + # Add WholeBody and RemainderOfBody if they are not available in masks, as they will be needed for dosimetry (and will be generated by default if not present) + if "WholeBody" not in all_masks: + all_masks.add("WholeBody") + if "RemainderOfBody" not in all_masks: + all_masks.add("RemainderOfBody") + # Get valid organ names from config try: from pytheranostics.imaging_ds.longitudinal_study import LongitudinalStudy diff --git a/pytheranostics/dosimetry/dvk.py b/pytheranostics/dosimetry/dvk.py index 96822f8..d7bb4ef 100644 --- a/pytheranostics/dosimetry/dvk.py +++ b/pytheranostics/dosimetry/dvk.py @@ -1,43 +1,565 @@ """Dose voxel kernel module for convolution-based dosimetry.""" -from typing import Optional +from __future__ import annotations + +import csv +import logging +import re +import shutil +import tempfile +import urllib.error +import urllib.request +import zipfile +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional, Sequence, Tuple import numpy from scipy import signal from pytheranostics.misc_tools.tools import hu_to_rho -from pytheranostics.shared.resources import resource_path + +logger = logging.getLogger(__name__) + +_DATA_DIR = Path(__file__).resolve().parent.parent / "data" / "voxel_kernels" +_ZENODO_RECORD_ID = "7596345" +_DEFAULT_CROP_SIZE_MM = 25.0 +_CSV_PATTERN = re.compile( + r"^(?P[0-9]+[A-Za-z]+)" + r"_(?P[^_]+)" + r"_(?P[^_]+)" + r"_(?P[0-9]+(?:\.[0-9]+)?)mm" + r"_(?P\d+)x(?P=matrix_size)x(?P=matrix_size)\.csv$", + flags=re.IGNORECASE, +) + + +@dataclass(frozen=True) +class KernelMetadata: + """Metadata parsed from a dose-kernel CSV filename. + + Attributes + ---------- + path : Path + Path to the CSV file on disk. + isotope : str + Canonical isotope label, for example ``"Lu177"``. + voxel_size_mm : float + Isotropic voxel size represented by the kernel, in millimeters. + matrix_size : int + Edge length of the cubic full kernel matrix. + """ + + path: Path + isotope: str + voxel_size_mm: float + matrix_size: int + + +def _split_isotope(isotope: str) -> Tuple[str, str]: + """Split an isotope label into mass number and element symbol. + + Parameters + ---------- + isotope : str + Isotope label formatted as ``"Lu177"`` or ``"177Lu"``. + + Returns + ------- + tuple of str + Mass number followed by the element symbol. + + Raises + ------ + ValueError + If ``isotope`` does not match one of the supported label formats. + """ + match = re.fullmatch(r"(?P[A-Za-z]+)(?P\d+)", isotope) + if match is None: + match = re.fullmatch(r"(?P\d+)(?P[A-Za-z]+)", isotope) + + if match is None: + raise ValueError( + "Isotope must be formatted like 'Lu177' or '177Lu'. " + f"Received '{isotope}'." + ) + + return match.group("mass"), match.group("symbol") + + +def _canonical_isotope(isotope: str) -> str: + """Convert an isotope label to the canonical comparison format. + + Parameters + ---------- + isotope : str + Isotope label formatted as ``"Lu177"`` or ``"177Lu"``. + + Returns + ------- + str + Canonical label with the element symbol first, for example ``"Lu177"``. + """ + mass, symbol = _split_isotope(isotope) + return f"{symbol.capitalize()}{mass}" + + +def _zenodo_isotope_name(isotope: str) -> str: + """Convert an isotope label to Zenodo's archive naming convention. + + Parameters + ---------- + isotope : str + Isotope label formatted as ``"Lu177"`` or ``"177Lu"``. + + Returns + ------- + str + Archive label with the mass number first, for example ``"177Lu"``. + """ + mass, symbol = _split_isotope(isotope) + return f"{mass}{symbol.capitalize()}" + + +def _discover_kernel_files(kernel_dir: Path, isotope: str) -> List[KernelMetadata]: + """Discover local kernel CSV files matching an isotope. + + Parameters + ---------- + kernel_dir : Path + Root directory searched recursively for kernel CSV files. + isotope : str + Requested isotope label. + + Returns + ------- + list of KernelMetadata + Parsed metadata for matching kernel files. + """ + requested_isotope = _canonical_isotope(isotope) + kernels: List[KernelMetadata] = [] + + for path in sorted(kernel_dir.rglob("*.csv")): + match = _CSV_PATTERN.match(path.name) + if match is None: + continue + + parsed_isotope = _canonical_isotope(match.group("isotope")) + if parsed_isotope != requested_isotope: + continue + + kernels.append( + KernelMetadata( + path=path, + isotope=parsed_isotope, + voxel_size_mm=float(match.group("voxel_size")), + matrix_size=int(match.group("matrix_size")), + ) + ) + + return kernels + + +def _select_closest_kernel( + requested_voxel_size_mm: float, available_kernels: Sequence[KernelMetadata] +) -> KernelMetadata: + """Select the available kernel whose voxel size is closest to the request. + + Parameters + ---------- + requested_voxel_size_mm : float + Requested isotropic voxel size in millimeters. + available_kernels : sequence of KernelMetadata + Candidate kernels for the isotope of interest. + + Returns + ------- + KernelMetadata + Metadata for the closest matching kernel. + + Raises + ------ + ValueError + If ``available_kernels`` is empty. + """ + if not available_kernels: + raise ValueError("No kernels were provided for voxel-size selection.") + + closest_kernel = min( + available_kernels, + key=lambda kernel: abs(kernel.voxel_size_mm - requested_voxel_size_mm), + ) + delta_mm = abs(closest_kernel.voxel_size_mm - requested_voxel_size_mm) + + if delta_mm > 0.09: + logger.warning( + "Requested voxel size %.3f mm does not closely match an available " + "kernel. Using %.4f mm instead.", + requested_voxel_size_mm, + closest_kernel.voxel_size_mm, + ) + + return closest_kernel + + +def _download_kernels_from_zenodo(isotope: str, kernel_dir: Path) -> None: + """Download and extract dose-kernel CSV files for an isotope. + + Parameters + ---------- + isotope : str + Requested isotope label. + kernel_dir : Path + Destination directory where the downloaded archive is extracted. + + Raises + ------ + NotImplementedError + If no Zenodo archive exists for the requested isotope. + FileNotFoundError + If the downloaded archive does not contain any CSV kernel files. + urllib.error.URLError + If the archive download fails for a network-related reason. + """ + zenodo_name = _zenodo_isotope_name(isotope) + url = ( + f"https://zenodo.org/records/{_ZENODO_RECORD_ID}/files/" + f"{zenodo_name}.zip?download=1" + ) + + kernel_dir.mkdir(parents=True, exist_ok=True) + + try: + with urllib.request.urlopen(url, timeout=120) as response: + with tempfile.NamedTemporaryFile(suffix=".zip", delete=False) as tmp_file: + shutil.copyfileobj(response, tmp_file) + archive_path = Path(tmp_file.name) + except urllib.error.HTTPError as exc: + if exc.code == 404: + logger.error( + "No dose voxel kernel archive is available on Zenodo for isotope %s.", + isotope, + ) + raise NotImplementedError( + f"Dose voxel kernels for isotope '{isotope}' are not available on Zenodo." + ) from exc + raise + + try: + with zipfile.ZipFile(archive_path) as archive: + csv_members = [ + member + for member in archive.infolist() + if member.filename.lower().endswith(".csv") + ] + if not csv_members: + raise FileNotFoundError( + f"Downloaded archive for isotope '{isotope}' does not contain CSV kernels." + ) + + for member in csv_members: + archive.extract(member, path=kernel_dir) + finally: + archive_path.unlink(missing_ok=True) + + logger.info("Downloaded dose voxel kernels for %s from %s", isotope, url) + + +def _ensure_kernel_files_available( + kernel_dir: Path, isotope: str +) -> List[KernelMetadata]: + """Ensure that kernel CSV files for an isotope are available locally. + + Parameters + ---------- + kernel_dir : Path + Root directory containing local kernel files. + isotope : str + Requested isotope label. + + Returns + ------- + list of KernelMetadata + Available kernels for the requested isotope after local discovery and any + required download. + + Raises + ------ + NotImplementedError + If matching kernels are still unavailable after the download attempt. + """ + kernels = _discover_kernel_files(kernel_dir=kernel_dir, isotope=isotope) + if kernels: + return kernels + + has_any_csv_kernels = any( + _CSV_PATTERN.match(path.name) is not None for path in kernel_dir.rglob("*.csv") + ) + if not has_any_csv_kernels: + logger.info( + "No kernel CSV files found under %s. Downloading %s kernels.", + kernel_dir, + isotope, + ) + else: + logger.info( + "No local kernel CSVs found for %s under %s. Downloading isotope archive.", + isotope, + kernel_dir, + ) + + _download_kernels_from_zenodo(isotope, kernel_dir) + kernels = _discover_kernel_files(kernel_dir=kernel_dir, isotope=isotope) + if not kernels: + raise NotImplementedError( + f"Dose voxel kernels for isotope '{isotope}' were not found after download." + ) + + return kernels + + +def _load_octant_kernel(csv_path: Path) -> numpy.ndarray: + """Load a stored positive octant from a kernel CSV file. + + Parameters + ---------- + csv_path : Path + Path to a kernel CSV file using the stacked-square Zenodo layout. + + Returns + ------- + numpy.ndarray + Three-dimensional positive octant of the kernel. + + Raises + ------ + ValueError + If the CSV file is empty, malformed, or cannot be reshaped into stacked + square sections. + """ + with csv_path.open("r", encoding="utf-8", newline="") as file_obj: + rows = [ + row for row in csv.reader(file_obj) if any(cell.strip() for cell in row) + ] + + if not rows: + raise ValueError(f"Kernel CSV '{csv_path}' is empty.") + + section_size = len(rows[0]) + if section_size == 0: + raise ValueError(f"Kernel CSV '{csv_path}' does not contain numeric data.") + + if any(len(row) != section_size for row in rows): + raise ValueError(f"Kernel CSV '{csv_path}' has inconsistent row lengths.") + + if len(rows) % section_size != 0: + raise ValueError( + f"Kernel CSV '{csv_path}' cannot be reshaped into stacked square sections." + ) + + num_sections = len(rows) // section_size + octant = numpy.empty( + (section_size, section_size, num_sections), dtype=numpy.float64 + ) + + for section_idx in range(num_sections): + start = section_idx * section_size + stop = start + section_size + block = numpy.asarray(rows[start:stop], dtype=numpy.float64) + octant[:, :, section_idx] = block.T + + return octant + + +def _mirror_axis_from_center(half_kernel: numpy.ndarray, axis: int) -> numpy.ndarray: + """Mirror a kernel axis around the center voxel. + + Parameters + ---------- + half_kernel : numpy.ndarray + Kernel array where index 0 along ``axis`` corresponds to the center voxel. + axis : int + Axis to mirror. + + Returns + ------- + numpy.ndarray + Kernel with the requested axis mirrored onto the negative side. + """ + slicer = [slice(None)] * half_kernel.ndim + slicer[axis] = slice(1, None) + positive_side = half_kernel[tuple(slicer)] + mirrored_side = numpy.flip(positive_side, axis=axis) + return numpy.concatenate((mirrored_side, half_kernel), axis=axis) + + +def _build_full_kernel_from_octant(octant: numpy.ndarray) -> numpy.ndarray: + """Expand a stored positive octant into a full three-dimensional kernel. + + Parameters + ---------- + octant : numpy.ndarray + Positive octant whose index 0 corresponds to the kernel center along each + axis. + + Returns + ------- + numpy.ndarray + Full symmetric three-dimensional kernel. + """ + kernel = octant + for axis in range(3): + kernel = _mirror_axis_from_center(kernel, axis=axis) + return kernel + + +def _odd_voxel_count_for_physical_size(size_mm: float, voxel_size_mm: float) -> int: + """Return the nearest odd voxel count for a requested physical size. + + Parameters + ---------- + size_mm : float + Requested physical size in millimeters. + voxel_size_mm : float + Isotropic voxel size in millimeters. + + Returns + ------- + int + Odd voxel count closest to the requested physical extent. + + Raises + ------ + ValueError + If ``size_mm`` is not strictly positive. + """ + if size_mm <= 0: + raise ValueError("Crop size must be positive when cropping is enabled.") + + target_voxels = max(1.0, size_mm / voxel_size_mm) + rounded_voxels = max(1, int(round(target_voxels))) + if rounded_voxels % 2 == 1: + return rounded_voxels + + lower = max(1, rounded_voxels - 1) + upper = rounded_voxels + 1 + if abs(lower - target_voxels) <= abs(upper - target_voxels): + return lower + return upper + + +def _crop_kernel_around_center( + kernel: numpy.ndarray, voxel_size_mm: float, crop_size_mm: Optional[float] +) -> numpy.ndarray: + """Crop a kernel symmetrically around its center voxel. + + Parameters + ---------- + kernel : numpy.ndarray + Full three-dimensional dose kernel. + voxel_size_mm : float + Isotropic voxel size represented by ``kernel``. + crop_size_mm : float, optional + Requested physical crop size in millimeters. If ``None``, no cropping is + applied. + + Returns + ------- + numpy.ndarray + Cropped kernel, or the original kernel when no crop is needed. + """ + if crop_size_mm is None: + return kernel + + target_voxels = _odd_voxel_count_for_physical_size(crop_size_mm, voxel_size_mm) + if target_voxels >= kernel.shape[0]: + return kernel + + center = kernel.shape[0] // 2 + half_width = target_voxels // 2 + start = center - half_width + stop = center + half_width + 1 + return kernel[start:stop, start:stop, start:stop] class DoseVoxelKernel: - """Dose Voxel Kernel for convolution-based dosimetry calculations.""" + """Dose voxel kernel used for convolution-based dosimetry calculations. + + Parameters + ---------- + isotope : str + Requested isotope label. + voxel_size_mm : float + Requested isotropic voxel size in millimeters. + crop_kernel_size_mm : float, optional + Physical crop size of the cubic kernel in millimeters. Pass ``None`` to + keep the full kernel. + + Attributes + ---------- + kernel : numpy.ndarray + Loaded full or cropped dose kernel. + voxel_size_mm : float + Voxel size of the selected kernel in millimeters. + matrix_size : int + Matrix size of the selected full kernel before optional cropping. + isotope : str + Canonical isotope label of the selected kernel. + """ + + def __init__( + self, + isotope: str, + voxel_size_mm: float, + crop_kernel_size_mm: Optional[float] = _DEFAULT_CROP_SIZE_MM, + ) -> None: + """Initialize a dose voxel kernel from local or downloaded CSV files. - def __init__(self, isotope: str, voxel_size_mm: float) -> None: - """Initialize the DoseVoxelKernel. + Parameters + ---------- + isotope : str + Requested isotope label, for example ``"Lu177"``. + voxel_size_mm : float + Requested isotropic voxel size in millimeters. + crop_kernel_size_mm : float, optional + Physical cubic crop size in millimeters. Pass ``None`` to disable + cropping. - Args - ---- - isotope (str): The isotope name (e.g., 'Lu177'). - voxel_size_mm (float): Voxel size in millimeters. + Raises + ------ + ValueError + If the reconstructed kernel shape does not match the filename metadata. + NotImplementedError + If no kernel files can be found or downloaded for ``isotope``. """ - kernel_filename = ( - f"voxel_kernels/{isotope}-{voxel_size_mm:1.2f}-mm-mGyperMBqs-SoftICRP.img" + available_kernels = _ensure_kernel_files_available(_DATA_DIR, isotope) + selected_kernel = _select_closest_kernel( + requested_voxel_size_mm=voxel_size_mm, + available_kernels=available_kernels, ) - try: - with resource_path("pytheranostics.data", kernel_filename) as kernel_path: - self.kernel = numpy.fromfile(kernel_path, dtype=numpy.float32) - except FileNotFoundError: - print( - f" >> Voxel Kernel for SPECT voxel size ({voxel_size_mm:2.2f} mm) not found. Using default kernel for 4.8 mm voxels..." - ) - fallback_filename = ( - f"voxel_kernels/{isotope}-4.80-mm-mGyperMBqs-SoftICRP.img" + octant = _load_octant_kernel(selected_kernel.path) + full_kernel = _build_full_kernel_from_octant(octant) + + expected_shape = ( + selected_kernel.matrix_size, + selected_kernel.matrix_size, + selected_kernel.matrix_size, + ) + if full_kernel.shape != expected_shape: + raise ValueError( + "Kernel shape reconstructed from CSV does not match the filename " + f"metadata for '{selected_kernel.path.name}'." ) - with resource_path("pytheranostics.data", fallback_filename) as kernel_path: - self.kernel = numpy.fromfile(kernel_path, dtype=numpy.float32) - self.kernel = self.kernel.reshape((51, 51, 51)).astype(numpy.float64) + self.kernel = _crop_kernel_around_center( + kernel=full_kernel, + voxel_size_mm=selected_kernel.voxel_size_mm, + crop_size_mm=crop_kernel_size_mm, + ).astype(numpy.float64) + self.voxel_size_mm = float(selected_kernel.voxel_size_mm) + self.matrix_size = int(selected_kernel.matrix_size) + self.isotope = selected_kernel.isotope def tia_to_dose( self, tia_mbq_s: numpy.ndarray, ct: Optional[numpy.ndarray] = None @@ -59,10 +581,10 @@ def tia_to_dose( dose_mGy = signal.fftconvolve(tia_mbq_s, self.kernel, mode="same", axes=None) if ct is not None: - # TODO: Handle erroneous scale-up of dose outside of body. - print( - "Warning -> Scaling dose by density will yield erroneous dose values in very low density voxels (e.g., air inside the body)." - " Please use at your own risk" + logger.warning( + "Applying local mass-density reweighting to a homogeneous-medium dose " + "kernel result. This is not a full heterogeneity correction and is " + "only a rough approximation in soft tissue." ) dose_mGy = self.weight_dose_by_density(dose_map=dose_mGy, ct=ct) @@ -71,20 +593,54 @@ def tia_to_dose( def weight_dose_by_density( self, dose_map: numpy.ndarray, ct: numpy.ndarray ) -> numpy.ndarray: - """Scale dose per voxel by voxel density. + """Apply local mass-density reweighting to a dose map. + + This method does not perform a true heterogeneity correction. It assumes the + dose kernel was computed in a homogeneous medium close to soft tissue and + rescales the resulting dose voxel-by-voxel by ``1 / rho`` using CT-derived + density. As a result, it should be treated only as a rough local mass + correction in near-water soft tissues. - This is only valid for voxels of density similar to that of soft tissue and will also improve results for voxels - with higher density of soft tissue in some instances. However, it will over-estimate doses in voxels with lower density than soft tissue. - To prevent dose to shoot-up in areas of air where there is activity present (e.g., in the patient's gut), we do not apply scaling based on density in those voxels (i.e., we apply a factor of 1, which is equivalent to saying - the tissue is ~ soft tissue). + Methodological limitations: + - It uses only the local density of the target voxel and ignores transport + effects from surrounding tissues. + - It does not account for material composition differences beyond the + HU-to-density mapping. + - Negative HU values are clipped to 0 HU before conversion so air and other + low-density voxels are effectively treated as water-equivalent for this + scaling step. This avoids unphysical dose blow-up but is not physically + rigorous. - Args: - dose_map (numpy.ndarray): Dose-map obtained from convolution of TIA map and Dose Kernel. - ct (numpy.ndarray): CT image, in HU. + Parameters + ---------- + dose_map : numpy.ndarray + Dose map obtained from convolution of the TIA map with a homogeneous- + medium dose kernel. + ct : numpy.ndarray + CT image in HU, sampled on the same grid as ``dose_map``. Returns ------- numpy.ndarray - Modified Dose-map with dose per voxel scaled-up by density. + Dose map after local density reweighting. + + Raises + ------ + ValueError + If ``dose_map`` and ``ct`` do not share the same shape. """ - return 1 / hu_to_rho(hu=numpy.clip(ct, 0, 99999)) * dose_map + if dose_map.shape != ct.shape: + raise ValueError( + "Dose-map and CT array must have the same shape for density weighting. " + f"Got {dose_map.shape} and {ct.shape}." + ) + + ct_clipped = numpy.clip(ct.astype(numpy.float64), 0.0, None) + rho = hu_to_rho(hu=ct_clipped) + + logger.info( + "Applying local density reweighting with HU clipped to [0, inf) before " + "HU-to-density conversion." + ) + + return dose_map / rho diff --git a/pytheranostics/dosimetry/voxel_s_dosimetry.py b/pytheranostics/dosimetry/voxel_s_dosimetry.py index 3dd719e..27fb2e6 100644 --- a/pytheranostics/dosimetry/voxel_s_dosimetry.py +++ b/pytheranostics/dosimetry/voxel_s_dosimetry.py @@ -1,8 +1,9 @@ """Module for voxel-level dosimetry calculations.""" +import logging import os import shutil -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple import numpy import pandas @@ -16,12 +17,14 @@ from pytheranostics.imaging_tools.tools import itk_image_from_array, resample_to_target from pytheranostics.shared.resources import resource_path +logger = logging.getLogger(__name__) + class VoxelSDosimetry(BaseDosimetry): - """Voxel S Dosimetry class. + """Voxel S-value dosimetry workflow. - Computes parameters of fit for time activity curves at the region (organ/lesion) level, - and apply them at the voxel level for voxels belonging to user-defined regions. + Computes time-activity fits at the region level and propagates the resulting + kinetics to voxels belonging to user-defined regions. """ def __init__( @@ -71,6 +74,12 @@ def compute_voxel_tia(self) -> None: for region, region_data in self.results.iterrows(): + # Type check: + if not isinstance(region, str): + raise TypeError( + f"Region names should be strings. Found {type(region)} instead." + ) + if region == "WholeBody": continue # We do not want to double count voxels! @@ -90,7 +99,7 @@ def compute_voxel_tia(self) -> None: region_tia = region_data["TIA_MBq_h"] region_fit_params = region_data["Fit_params"] # fit params - exp_order = self.config["rois"][region]["fit_order"] + exp_order = self.config["VOIs"][region]["fit_order"] region_fit, _, _ = get_exponential( order=exp_order, param_init=None, decayconst=1.0 ) # Decay-constant not used here. @@ -119,22 +128,55 @@ def compute_voxel_tia(self) -> None: return None def apply_voxel_s(self) -> None: - """Apply convolution over TIA map.""" + """Convolve the voxel TIA map with the selected dose kernel. + + Resamples the TIA map to the kernel grid, optionally applies local + density-based dose scaling using CT, and stores the resulting dose map on + the configured NM or CT output grid. + """ ref_time_id = self.config["ReferenceTimePoint"] - nm_voxel_mm = self.nm_data.images[ref_time_id].GetSpacing()[0] + output_grid = str(self.config.get("VoxelSOutputGrid", "NM")).upper() + logger.info("Applying voxel-S dosimetry using %s output grid.", output_grid) + output_ref_image, output_mask = self._resolve_output_grid_reference( + ref_time_id=ref_time_id, output_grid=output_grid + ) + tia_ref_image = self.tia_map.images[0] + tia_voxel_mm = self._scalar_voxel_size_mm(tia_ref_image, image_name="TIA map") + logger.info("Selecting dose kernel from TIA map spacing %.3f mm.", tia_voxel_mm) + + if self.nm_data.meta[0].Radionuclide is None: + raise ValueError( + "Radionuclide information is required in nm_data meta to apply voxel S-value convolution." + ) dose_kernel = DoseVoxelKernel( - isotope=self.nm_data.meta[0].Radionuclide, voxel_size_mm=nm_voxel_mm + isotope=self.nm_data.meta[0].Radionuclide, voxel_size_mm=tia_voxel_mm ) + logger.info( + "Using %s kernel with voxel size %.3f mm and matrix size %d.", + dose_kernel.isotope, + dose_kernel.voxel_size_mm, + dose_kernel.matrix_size, + ) + kernel_ref_image = self._build_reference_image_with_spacing( + ref_image=tia_ref_image, + spacing_mm=( + dose_kernel.voxel_size_mm, + dose_kernel.voxel_size_mm, + dose_kernel.voxel_size_mm, + ), + ) + tia_kernel_grid = self._resample_tia_to_target_grid(target_img=kernel_ref_image) - # Resample CT to NM (Default using linear interpolator) + logger.info("Resampling CT to the kernel grid for density scaling.") resampled_ct = resample_to_target( source_img=self.ct_data.images[ref_time_id], - target_img=self.nm_data.images[ref_time_id], + target_img=kernel_ref_image, ) - dose_map_array = dose_kernel.tia_to_dose( - tia_mbq_s=self.tia_map.array_at(0) * self.toMBqs, + logger.info("Computing dose on the kernel grid.") + kernel_dose_array = dose_kernel.tia_to_dose( + tia_mbq_s=tia_kernel_grid * self.toMBqs, ct=( numpy.transpose( SimpleITK.GetArrayFromImage(resampled_ct), axes=(1, 2, 0) @@ -143,27 +185,225 @@ def apply_voxel_s(self) -> None: else None ), ) + kernel_dose_img = itk_image_from_array( + array=numpy.transpose(kernel_dose_array, axes=(2, 0, 1)), + ref_image=kernel_ref_image, + ) + logger.info( + "Resampling dose map from kernel grid to %s output grid.", output_grid + ) + dose_map_array = numpy.transpose( + SimpleITK.GetArrayFromImage( + resample_to_target( + source_img=kernel_dose_img, + target_img=output_ref_image, + default_value=0.0, + ) + ), + axes=(1, 2, 0), + ) # Create ITK Image Object and embed it into a LongitudinalStudy - # Clear dose outside patient body: - dose_map_array *= self.nm_data.masks[ref_time_id]["WholeBody"] + # Clear dose outside patient body on the chosen output grid when a mask exists. + if output_mask is not None: + logger.info("Applying WholeBody mask on the %s output grid.", output_grid) + dose_map_array *= output_mask + logger.info("Storing voxel-S dose map on the %s output grid.", output_grid) self.dose_map = LongitudinalStudy( images={ 0: itk_image_from_array( array=numpy.transpose(dose_map_array, axes=(2, 0, 1)), - ref_image=self.nm_data.images[ref_time_id], + ref_image=output_ref_image, ) }, meta={0: self.nm_data.meta[ref_time_id]}, ) - self.dose_map.masks[0] = self.nm_data.masks[0].copy() + if output_grid == "NM" and ref_time_id in self.nm_data.masks: + self.dose_map.masks[0] = self.nm_data.masks[ref_time_id].copy() + elif output_grid == "CT" and ref_time_id in self.ct_data.masks: + self.dose_map.masks[0] = self.ct_data.masks[ref_time_id].copy() return None + @staticmethod + def _build_reference_image_with_spacing( + ref_image: SimpleITK.Image, spacing_mm: Tuple[float, float, float] + ) -> SimpleITK.Image: + """Create a blank reference image on the same physical frame. + + Parameters + ---------- + ref_image : SimpleITK.Image + Source image providing origin, direction, and physical extent. + spacing_mm : tuple of float + Requested output spacing in millimeters. + + Returns + ------- + SimpleITK.Image + Blank image with the requested spacing and a size adjusted to preserve + the source physical coverage. + """ + original_spacing = ref_image.GetSpacing()[:3] + original_size = ref_image.GetSize()[:3] + new_size = [ + max( + 1, + int( + round(original_size[idx] * original_spacing[idx] / spacing_mm[idx]) + ), + ) + for idx in range(3) + ] + target_img = SimpleITK.Image(new_size, ref_image.GetPixelID()) + target_img.SetSpacing(spacing_mm) + target_img.SetOrigin(ref_image.GetOrigin()) + target_img.SetDirection(ref_image.GetDirection()) + return target_img + + @staticmethod + def _scalar_voxel_size_mm(image: SimpleITK.Image, image_name: str) -> float: + """Return a scalar voxel size for isotropic kernel selection. + + Parameters + ---------- + image : SimpleITK.Image + Image whose spacing is used for kernel selection. + image_name : str + Label used in warning messages when the image spacing is anisotropic. + + Returns + ------- + float + Mean voxel spacing in millimeters across the first three axes. + """ + spacing = tuple(float(value) for value in image.GetSpacing()[:3]) + mean_spacing = float(numpy.mean(spacing)) + if not numpy.allclose(spacing, mean_spacing, atol=0.1): + logger.warning( + "%s spacing %s mm is anisotropic. Kernel selection will use the " + "mean voxel size %.3f mm and resample to the chosen isotropic grid.", + image_name, + spacing, + mean_spacing, + ) + return mean_spacing + + def _resample_tia_to_target_grid( + self, target_img: SimpleITK.Image + ) -> numpy.ndarray: + """Resample the TIA map to a target grid while preserving total activity. + + Parameters + ---------- + target_img : SimpleITK.Image + Target image defining the output voxel grid. + + Returns + ------- + numpy.ndarray + Resampled TIA map on the target grid, expressed in MBq h per voxel. + """ + logger.info( + "Resampling TIA map to kernel grid using voxel-volume normalization to preserve total activity." + ) + tia_array = self.tia_map.array_at(0).astype(numpy.float64) + source_voxel_volume_ml = self._voxel_volume_ml(self.tia_map.images[0]) + target_voxel_volume_ml = self._voxel_volume_ml(target_img) + + tia_density = tia_array / source_voxel_volume_ml + tia_density_img = itk_image_from_array( + array=numpy.transpose(tia_density, axes=(2, 0, 1)), + ref_image=self.tia_map.images[0], + ) + resampled_density_img = resample_to_target( + source_img=tia_density_img, + target_img=target_img, + default_value=0.0, + ) + resampled_density = numpy.transpose( + SimpleITK.GetArrayFromImage(resampled_density_img), + axes=(1, 2, 0), + ).astype(numpy.float64) + resampled_tia = resampled_density * target_voxel_volume_ml + + source_total = float(numpy.sum(tia_array)) + resampled_total = float(numpy.sum(resampled_tia)) + logger.info( + "TIA totals before renormalization: source=%.6g MBq h, resampled=%.6g MBq h.", + source_total, + resampled_total, + ) + if source_total > 0.0 and resampled_total > 0.0: + resampled_tia *= source_total / resampled_total + logger.info("Renormalized resampled TIA map to preserve total activity.") + + return resampled_tia + + @staticmethod + def _voxel_volume_ml(image: SimpleITK.Image) -> float: + """Return the voxel volume in milliliters. + + Parameters + ---------- + image : SimpleITK.Image + Image whose spacing defines the voxel dimensions. + + Returns + ------- + float + Voxel volume in milliliters. + """ + spacing = image.GetSpacing()[:3] + return float(spacing[0] * spacing[1] * spacing[2] / 1000.0) + + def _resolve_output_grid_reference( + self, ref_time_id: int, output_grid: str + ) -> Tuple[SimpleITK.Image, Optional[numpy.ndarray]]: + """Return the reference image and optional body mask for an output grid. + + Parameters + ---------- + ref_time_id : int + Time point used as the reference for image and mask lookup. + output_grid : str + Output grid selector. Supported values are ``"NM"`` and ``"CT"``. + + Returns + ------- + tuple + Reference image and an optional whole-body mask on that grid. + + Raises + ------ + ValueError + If ``output_grid`` is not supported. + """ + if output_grid == "NM": + return ( + self.nm_data.images[ref_time_id], + self.nm_data.masks[ref_time_id].get("WholeBody"), + ) + if output_grid == "CT": + ct_mask = None + if ref_time_id in self.ct_data.masks: + ct_mask = self.ct_data.masks[ref_time_id].get("WholeBody") + return self.ct_data.images[ref_time_id], ct_mask + + raise ValueError( + f"VoxelSOutputGrid '{output_grid}' is not supported. Use 'NM' or 'CT'." + ) + def run_MC(self) -> None: # TODO: finish the code!!!!! - """Run MC.""" + """Run the Monte Carlo dosimetry workflow. + + Raises + ------ + NotImplementedError + Always raised because the Monte Carlo workflow is not implemented yet. + """ raise NotImplementedError("MC is not implemmented yet.") n_cpu = self.config["#CPU"] n_primaries = self.config["#primaries"] @@ -233,12 +473,23 @@ def run_MC(self) -> None: # TODO: finish the code!!!!! def compute_dose(self) -> None: """Compute dose by performing the following steps. - Compute TIA at the region level. - Get parameters of fit from region and compute TIA at the voxel level. - Convolve TIA map with Dose kernel and (optional) scale with CT density. + Computes TIA at the region level, propagates the fit to the voxel level, + and then applies the configured voxel-S or Monte Carlo dose engine. """ self.compute_tia() self.compute_voxel_tia() + + ref_time_id = self.config["ReferenceTimePoint"] + self.nm_data.save_image_to_nii_at( + time_id=ref_time_id, out_path=self.db_dir, name="_nm_ref" + ) + self.ct_data.save_image_to_nii_at( + time_id=ref_time_id, out_path=self.db_dir, name="_ct_ref" + ) + self.tia_map.save_image_to_nii_at( + time_id=0, out_path=self.db_dir, name="_tia_map" + ) + if self.config["Method"] == "Voxel-S-value": self.apply_voxel_s() elif self.config["Method"] == "Monte-Carlo": diff --git a/pytheranostics/imaging_ds/cycle_loader.py b/pytheranostics/imaging_ds/cycle_loader.py index 0dd7013..f98a995 100644 --- a/pytheranostics/imaging_ds/cycle_loader.py +++ b/pytheranostics/imaging_ds/cycle_loader.py @@ -10,7 +10,7 @@ import json import re from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union import pydicom import SimpleITK @@ -300,10 +300,11 @@ def _build_auto_mapping(mask_keys: List[str]) -> Dict[str, str]: def create_studies_with_masks( - storage_root: str | Path, - patient_id: str, - cycle_no: int, + storage_root: Optional[str | Path] = None, + patient_id: Optional[str] = None, + cycle_no: Optional[int] = None, *, + study_info: Optional[Dict[str, Any]] = None, calibration_factor: Optional[float] = None, parallel: bool = True, max_workers: Optional[int] = None, @@ -327,12 +328,18 @@ def create_studies_with_masks( Parameters ---------- - storage_root : str | Path - Base directory where organized data lives. - patient_id : str - Patient identifier. - cycle_no : int - Cycle number (1-based). + storage_root : str | Path, optional + Base directory where organized data lives. Used with patient_id + cycle_no + for the legacy folder-structured loading behavior. + patient_id : str, optional + Patient identifier used with storage_root. + cycle_no : int, optional + Cycle number (1-based), used with storage_root. + study_info : dict, optional + Study dictionary returned by auto_setup_dosimetry_study() or + auto_setup_dosimetry_study_inventory(). If provided, CT/SPECT/RTSTRUCT + paths are loaded from this dictionary and storage_root/patient_id/cycle_no + are not required. calibration_factor : float, optional Optional SPECT calibration factor (Bq per count/sec) applied during image load. parallel : bool @@ -385,9 +392,89 @@ def create_studies_with_masks( ) # 1) Discover paths and injection metadata - ct_paths, spect_paths, rtstruct_files, inj = prepare_cycle_inputs( - storage_root, patient_id, cycle_no - ) + if study_info is not None: + tp_items = study_info.get("time_points", []) + if tp_items: + ct_paths = [ + Path(tp["ct_path"]) if tp.get("ct_path") else None for tp in tp_items + ] + spect_paths = [ + Path(tp["spect_path"]) if tp.get("spect_path") else None + for tp in tp_items + ] + rtstruct_files = [ + Path(tp["rtstruct_file"]) if tp.get("rtstruct_file") else None + for tp in tp_items + ] + first_tp_with_inj = next( + ( + tp + for tp in tp_items + if isinstance(tp.get("injection_info"), dict) + and tp.get("injection_info") + ), + None, + ) + inj_info = ( + first_tp_with_inj.get("injection_info", {}) + if first_tp_with_inj is not None + else {} + ) + else: + ct_paths = [Path(p) if p else None for p in study_info.get("ct_paths", [])] + spect_paths = [ + Path(p) if p else None for p in study_info.get("spect_paths", []) + ] + rtstruct_files = [ + Path(p) if p else None for p in study_info.get("rtstruct_files", []) + ] + inj_info = {} + + max_len = max(len(ct_paths), len(spect_paths), len(rtstruct_files), 0) + ct_paths.extend([None] * (max_len - len(ct_paths))) + spect_paths.extend([None] * (max_len - len(spect_paths))) + rtstruct_files.extend([None] * (max_len - len(rtstruct_files))) + + weight_g = inj_info.get("PatientWeight_g") + if weight_g is None: + weight_g = inj_info.get("patient_weight_g") + if weight_g is None and inj_info.get("patient_weight_kg") is not None: + try: + weight_g = int(round(float(inj_info["patient_weight_kg"]) * 1000.0)) + except Exception: + weight_g = None + + injected_activity = ( + inj_info.get("InjectedActivity") + if "InjectedActivity" in inj_info + else inj_info.get("injected_activity") + ) + if injected_activity is not None: + try: + injected_activity = int(round(float(injected_activity))) + except Exception: + pass + + inj = { + "InjectionDate": inj_info.get("InjectionDate") + or inj_info.get("injection_date") + or "", + "InjectionTime": _extract_hhmmss( + inj_info.get("InjectionTime") or inj_info.get("injection_time") + ) + or "", + "InjectedActivity": injected_activity, + "PatientWeight_g": weight_g, + "PatientHeight_cm": inj_info.get("PatientHeight_cm"), + } + else: + if storage_root is None or patient_id is None or cycle_no is None: + raise ValueError( + "Either provide study_info, or provide storage_root + patient_id + cycle_no." + ) + ct_paths, spect_paths, rtstruct_files, inj = prepare_cycle_inputs( + storage_root, patient_id, cycle_no + ) # 2) Build longitudinal studies from DICOM once ct_dirs = [str(p) for p in ct_paths if p is not None] @@ -422,6 +509,11 @@ def create_studies_with_masks( # Build NM masks only if target available target_img = next(iter(longSPECT.images.values()), None) + if target_img is None: + raise RuntimeError( + f"No SPECT image available for timepoint {time_id} to resample NM masks" + ) + ct_masks, nm_masks = load_and_resample_RT_to_target( ref_dicom_ct_dir=str(ct_dir), rt_struct_file=str(rt_file), @@ -483,6 +575,7 @@ def _is_valid_target(name: str) -> bool: if final_spect_mapping is not None: for k in nm_masks.keys(): dst = final_spect_mapping.get(k) + if dst is not None and _is_valid_target(dst): spect_map_valid[k] = dst else: diff --git a/pytheranostics/imaging_ds/dicom_ingest.py b/pytheranostics/imaging_ds/dicom_ingest.py index 48305e1..5f2f69e 100644 --- a/pytheranostics/imaging_ds/dicom_ingest.py +++ b/pytheranostics/imaging_ds/dicom_ingest.py @@ -5,14 +5,31 @@ """ import logging +import os +from datetime import datetime from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Any, Dict, Iterable, List, Optional, Tuple import pydicom +from pydicom.misc import is_dicom logger = logging.getLogger(__name__) +def _split_dicom_datetime(dt_val: Optional[str]) -> Tuple[Optional[str], Optional[str]]: + """Split a DICOM DT value into date (YYYYMMDD) and time (HHMMSS).""" + if not dt_val: + return None, None + val = str(dt_val).split(".")[0] + if len(val) < 8: + return None, None + date_part = val[:8] + time_part = val[8:14] if len(val) > 8 else None + if time_part: + time_part = time_part.ljust(6, "0") + return date_part, time_part + + class DosimetryStudyOrganizer: """ Organize DICOM studies for dosimetry analysis. @@ -178,17 +195,26 @@ def _extract_injection_info(self, ds: pydicom.Dataset) -> Dict: rp_info, "RadionuclideTotalDose", None ) - # Extract date and time - inj_date = getattr(rp_info, "RadiopharmaceuticalStartDate", None) - inj_time = getattr(rp_info, "RadiopharmaceuticalStartTime", None) - + inj_dt = getattr(rp_info, "RadiopharmaceuticalStartDateTime", None) + inj_date, inj_time = _split_dicom_datetime(inj_dt) if inj_date: info["injection_date"] = inj_date if inj_time: - # Format time to HHMMSS - info["injection_time"] = inj_time.split(".")[ - 0 - ] # Remove fractional seconds + info["injection_time"] = inj_time + + # Backward-compatible fallback when DateTime is not available. + if not info["injection_date"]: + inj_date_legacy = getattr( + rp_info, "RadiopharmaceuticalStartDate", None + ) + if inj_date_legacy: + info["injection_date"] = inj_date_legacy + if not info["injection_time"]: + inj_time_legacy = getattr( + rp_info, "RadiopharmaceuticalStartTime", None + ) + if inj_time_legacy: + info["injection_time"] = inj_time_legacy.split(".")[0] return info @@ -282,6 +308,338 @@ def auto_setup_dosimetry_study( ) +def auto_setup_dosimetry_study_inventory( + base_dir: Path, patient_id: Optional[str] = None +) -> Tuple[Dict[str, Any], List[str], List[str], List[str]]: + """ + Build a DICOM inventory of SPECT (NM/OT/PT), CT, and RTSTRUCT series. + + Parameters + ---------- + base_dir : Path + Base directory containing DICOM files. + patient_id : str, optional + Patient ID override. If None, the first detected PatientID is used. + + Returns + ------- + tuple + (study_info, ct_paths, spect_paths, rtstruct_files) + """ + + def _iter_dicom_files(root: Path) -> Iterable[Path]: + for path in root.rglob("*"): + if not path.is_file(): + continue + if path.suffix.lower() == ".dcm" or is_dicom(str(path)): + yield path + + def _parse_datetime( + date_str: Optional[str], time_str: Optional[str] + ) -> Optional[datetime]: + if not date_str: + return None + time_val = (time_str or "000000").split(".")[0] + if len(time_val) < 6: + time_val = time_val.ljust(6, "0") + try: + return datetime.strptime(f"{date_str}{time_val}", "%Y%m%d%H%M%S") + except ValueError: + return None + + def _dicom_datetime(ds: pydicom.Dataset) -> Optional[datetime]: + for date_key, time_key in [ + ("AcquisitionDate", "AcquisitionTime"), + ("SeriesDate", "SeriesTime"), + ("ContentDate", "ContentTime"), + ("StudyDate", "StudyTime"), + ]: + date_val = getattr(ds, date_key, None) + time_val = getattr(ds, time_key, None) + dt = _parse_datetime(date_val, time_val) + if dt: + return dt + return None + + def _common_parent(files: List[Path]) -> Optional[str]: + if not files: + return None + common = Path(os.path.commonpath([str(p) for p in files])) + if common.is_file(): + return str(common.parent) + return str(common) + + def _select_best_match( + target: Dict[str, Any], candidates: List[Dict[str, Any]] + ) -> Optional[Dict[str, Any]]: + if not candidates: + return None + target_study = target.get("study_uid") + filtered = [c for c in candidates if c.get("study_uid") == target_study] + if filtered: + candidates = filtered + target_dt = target.get("series_datetime") + dated = [c for c in candidates if c.get("series_datetime")] + if target_dt and dated: + return min( + dated, + key=lambda c: abs((c["series_datetime"] - target_dt).total_seconds()), + ) + return candidates[0] + + def _extract_injection_info(ds: pydicom.Dataset) -> Dict[str, Any]: + info: Dict[str, Any] = { + "patient_weight_kg": getattr(ds, "PatientWeight", None), + "injection_date": None, + "injection_time": None, + "injected_activity": None, + "radiopharmaceutical": None, + } + + if info["patient_weight_kg"]: + info["patient_weight_g"] = int(info["patient_weight_kg"] * 1000) + + if hasattr(ds, "RadiopharmaceuticalInformationSequence"): + rp_seq = ds.RadiopharmaceuticalInformationSequence + if len(rp_seq) > 0: + rp_info = rp_seq[0] + info["radiopharmaceutical"] = getattr( + rp_info, "Radiopharmaceutical", None + ) + info["injected_activity"] = getattr( + rp_info, "RadionuclideTotalDose", None + ) + + inj_dt = getattr(rp_info, "RadiopharmaceuticalStartDateTime", None) + inj_date, inj_time = _split_dicom_datetime(inj_dt) + if inj_date: + info["injection_date"] = inj_date + if inj_time: + info["injection_time"] = inj_time + + # Backward-compatible fallback when DateTime is not available. + if not info["injection_date"]: + inj_date_legacy = getattr( + rp_info, "RadiopharmaceuticalStartDate", None + ) + if inj_date_legacy: + info["injection_date"] = inj_date_legacy + if not info["injection_time"]: + inj_time_legacy = getattr( + rp_info, "RadiopharmaceuticalStartTime", None + ) + if inj_time_legacy: + info["injection_time"] = inj_time_legacy.split(".")[0] + + return info + + series_map: Dict[str, Dict[str, Any]] = {} + detected_patient_id: Optional[str] = None + + tag_list = [ + # Core indexing/matching fields + "Modality", + "SeriesInstanceUID", + "StudyInstanceUID", + "SOPClassUID", + "SOPInstanceUID", + "StudyDate", + "StudyTime", + "SeriesDate", + "SeriesTime", + "AcquisitionDate", + "AcquisitionTime", + "ContentDate", + "ContentTime", + "PatientID", + "PatientName", + "PatientSex", + "PatientBirthDate", + "PatientWeight", + # Common geometric/acquisition fields + "FrameOfReferenceUID", + "ImagePositionPatient", + "ImageOrientationPatient", + "PixelSpacing", + "SliceThickness", + "KVP", + "ConvolutionKernel", + "RescaleIntercept", + "RescaleSlope", + # NM/PT quantitative + injection fields + "SeriesDescription", + "AcquisitionDuration", + "Units", + "DecayCorrection", + "CorrectedImage", + "CountsSource", + "NumberOfSlices", + "RadiopharmaceuticalInformationSequence", + "RadiopharmaceuticalStartDateTime", + # RTSTRUCT linkage/content fields + "StructureSetLabel", + "StructureSetDate", + "StructureSetTime", + "ReferencedFrameOfReferenceSequence", + "StructureSetROISequence", + "ROIContourSequence", + "RTROIObservationsSequence", + ] + + for dcm_path in _iter_dicom_files(Path(base_dir)): + try: + ds = pydicom.dcmread( + dcm_path, stop_before_pixels=True, force=True, specific_tags=tag_list + ) + except Exception as exc: + logger.warning(f"Skipping unreadable DICOM: {dcm_path} ({exc})") + continue + + modality = getattr(ds, "Modality", None) + if modality not in {"CT", "RTSTRUCT", "NM", "OT", "PT"}: + continue + + series_uid = getattr(ds, "SeriesInstanceUID", None) + study_uid = getattr(ds, "StudyInstanceUID", None) + series_key = series_uid or f"{study_uid}|{modality}|{dcm_path.parent}" + + if series_key not in series_map: + series_map[series_key] = { + "modality": modality, + "study_uid": study_uid, + "series_uid": series_uid, + "files": [], + "series_datetime": _dicom_datetime(ds), + "patient_id": getattr(ds, "PatientID", None), + "representative_ds": ds, + } + + series_map[series_key]["files"].append(dcm_path) + + if not detected_patient_id: + detected_patient_id = getattr(ds, "PatientID", None) + + spect_series = [ + s for s in series_map.values() if s["modality"] in {"NM", "OT", "PT"} + ] + ct_series = [s for s in series_map.values() if s["modality"] == "CT"] + rt_series = [s for s in series_map.values() if s["modality"] == "RTSTRUCT"] + + logger.info( + "DICOM inventory: %d SPECT series, %d CT series, %d RTSTRUCT series", + len(spect_series), + len(ct_series), + len(rt_series), + ) + + for series in spect_series: + logger.info( + "SPECT series: study_uid=%s series_uid=%s datetime=%s files=%d root=%s", + series.get("study_uid"), + series.get("series_uid"), + series.get("series_datetime"), + len(series.get("files", [])), + _common_parent(series.get("files", [])) or "UNKNOWN", + ) + + for series in ct_series: + logger.info( + "CT series: study_uid=%s series_uid=%s datetime=%s files=%d root=%s", + series.get("study_uid"), + series.get("series_uid"), + series.get("series_datetime"), + len(series.get("files", [])), + _common_parent(series.get("files", [])) or "UNKNOWN", + ) + + for series in rt_series: + logger.info( + "RTSTRUCT series: study_uid=%s series_uid=%s datetime=%s files=%d root=%s", + series.get("study_uid"), + series.get("series_uid"), + series.get("series_datetime"), + len(series.get("files", [])), + _common_parent(series.get("files", [])) or "UNKNOWN", + ) + + time_points: List[Dict[str, Any]] = [] + ct_paths: List[str] = [] + spect_paths: List[str] = [] + rtstruct_files: List[str] = [] + + def _spect_sort_key(series: Dict[str, Any]) -> Tuple[int, datetime]: + dt_val = series.get("series_datetime") + if dt_val: + return (0, dt_val) + return (1, datetime.max) + + for idx, spect in enumerate(sorted(spect_series, key=_spect_sort_key)): + ct_match = _select_best_match(spect, ct_series) + rt_match = _select_best_match(spect, rt_series) + + spect_path = _common_parent(spect["files"]) + ct_path = _common_parent(ct_match["files"]) if ct_match else None + rt_file = str(rt_match["files"][0]) if rt_match and rt_match["files"] else None + + logger.info( + "Time point %s: SPECT=%s CT=%s RTSTRUCT=%s", + f"tp{idx + 1}", + spect_path or "NONE", + ct_path or "NONE", + rt_file or "NONE", + ) + if ct_match: + logger.info( + "Time point %s CT match: study_uid=%s series_uid=%s datetime=%s", + f"tp{idx + 1}", + ct_match.get("study_uid"), + ct_match.get("series_uid"), + ct_match.get("series_datetime"), + ) + if rt_match: + logger.info( + "Time point %s RTSTRUCT match: study_uid=%s series_uid=%s datetime=%s", + f"tp{idx + 1}", + rt_match.get("study_uid"), + rt_match.get("series_uid"), + rt_match.get("series_datetime"), + ) + + tp_info: Dict[str, Any] = { + "name": f"tp{idx + 1}", + "spect_path": spect_path, + "ct_path": ct_path, + "rtstruct_file": rt_file, + "series_datetime": spect.get("series_datetime"), + "study_uid": spect.get("study_uid"), + "series_uid": spect.get("series_uid"), + "patient_id": spect.get("patient_id"), + "injection_info": ( + _extract_injection_info(spect["representative_ds"]) + if spect.get("representative_ds") + else {} + ), + } + + time_points.append(tp_info) + if ct_path: + ct_paths.append(ct_path) + if spect_path: + spect_paths.append(spect_path) + if rt_file: + rtstruct_files.append(rt_file) + + study_info: Dict[str, Any] = { + "patient_id": patient_id or detected_patient_id, + "time_points": time_points, + "ct_paths": ct_paths, + "spect_paths": spect_paths, + "rtstruct_files": rtstruct_files, + } + + return study_info, ct_paths, spect_paths, rtstruct_files + + def extract_patient_metadata(dicom_dir: Path) -> Dict: """ Extract patient metadata from the first DICOM file in a directory. @@ -320,15 +678,20 @@ def extract_patient_metadata(dicom_dir: Path) -> Dict: metadata["injected_activity"] = getattr( rp_info, "RadionuclideTotalDose", None ) - metadata["injection_date"] = getattr( - rp_info, "RadiopharmaceuticalStartDate", None - ) - metadata["injection_time"] = getattr( - rp_info, "RadiopharmaceuticalStartTime", None - ) - if metadata["injection_time"]: - metadata["injection_time"] = metadata["injection_time"].split(".")[ - 0 - ] + inj_dt = getattr(rp_info, "RadiopharmaceuticalStartDateTime", None) + inj_date, inj_time = _split_dicom_datetime(inj_dt) + metadata["injection_date"] = inj_date + metadata["injection_time"] = inj_time + + if not metadata["injection_date"]: + metadata["injection_date"] = getattr( + rp_info, "RadiopharmaceuticalStartDate", None + ) + if not metadata["injection_time"]: + inj_time_legacy = getattr( + rp_info, "RadiopharmaceuticalStartTime", None + ) + if inj_time_legacy: + metadata["injection_time"] = inj_time_legacy.split(".")[0] return metadata diff --git a/pytheranostics/imaging_ds/longitudinal_study.py b/pytheranostics/imaging_ds/longitudinal_study.py index e5cfbdb..e078b8a 100644 --- a/pytheranostics/imaging_ds/longitudinal_study.py +++ b/pytheranostics/imaging_ds/longitudinal_study.py @@ -800,10 +800,12 @@ def activity_in(self, region: str, time_id: int) -> float: raise AssertionError( "Can't compute activity if the image data does not represent the distribution of a radionuclide" ) - return numpy.sum( - self.masks[time_id][region] - * self.array_at(time_id=time_id) - * self.voxel_volume(time_id=time_id) + return float( + numpy.sum( + self.masks[time_id][region] + * self.array_at(time_id=time_id) + * self.voxel_volume(time_id=time_id) + ) ) def density_of(self, region: str, time_id: int) -> float: @@ -971,7 +973,7 @@ def save_image_to_nii_at( """ logger.info(f"Writing Image ({name}) into nifty file.") SimpleITK.WriteImage( - image=SimpleITK.Cast(self.images[time_id], SimpleITK.sitkInt32), + image=self.images[time_id], fileName=out_path / f"Image_{time_id}{name}.nii.gz", ) return None diff --git a/pytheranostics/imaging_tools/tools.py b/pytheranostics/imaging_tools/tools.py index 5f43e14..8988ee8 100644 --- a/pytheranostics/imaging_tools/tools.py +++ b/pytheranostics/imaging_tools/tools.py @@ -4,6 +4,8 @@ import glob import logging +import math +from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Dict, List, Optional, Tuple @@ -184,64 +186,203 @@ def itk_image_from_array( return image +def _parse_dicom_date_time( + date_val: Optional[str], time_val: Optional[str] +) -> Optional[datetime]: + """Parse DICOM DA/TM values into a Python datetime.""" + if not date_val: + return None + + date_str = str(date_val) + time_str = str(time_val or "000000").split(".")[0] + if len(time_str) < 6: + time_str = time_str.ljust(6, "0") + + try: + return datetime.strptime(f"{date_str}{time_str[:6]}", "%Y%m%d%H%M%S") + except ValueError: + return None + + +def _parse_dicom_datetime(dt_val: Optional[str]) -> Optional[datetime]: + """Parse a DICOM DT value into a Python datetime.""" + if not dt_val: + return None + + value = str(dt_val).split(".")[0] + if len(value) < 8: + return None + + return _parse_dicom_date_time(value[:8], value[8:14] if len(value) > 8 else None) + + +def _get_acquisition_start_datetime(ds: pydicom.Dataset) -> Optional[datetime]: + """Return the best available acquisition-start datetime from a DICOM dataset.""" + for date_key, time_key in [ + ("AcquisitionDate", "AcquisitionTime"), + ("SeriesDate", "SeriesTime"), + ("ContentDate", "ContentTime"), + ("StudyDate", "StudyTime"), + ]: + dt = _parse_dicom_date_time( + getattr(ds, date_key, None), getattr(ds, time_key, None) + ) + if dt is not None: + return dt + return None + + +def _get_radiopharm_admin_datetime_and_half_life( + ds: pydicom.Dataset, +) -> Tuple[Optional[datetime], Optional[float]]: + """Return radiopharmaceutical administration datetime and half-life in seconds.""" + if not hasattr(ds, "RadiopharmaceuticalInformationSequence"): + return None, None + + rp_seq = ds.RadiopharmaceuticalInformationSequence + if len(rp_seq) == 0: + return None, None + + rp_info = rp_seq[0] + admin_dt = _parse_dicom_datetime( + getattr(rp_info, "RadiopharmaceuticalStartDateTime", None) + ) + if admin_dt is None: + admin_date = getattr(rp_info, "RadiopharmaceuticalStartDate", None) + if admin_date is None: + admin_date = getattr(ds, "StudyDate", None) + admin_dt = _parse_dicom_date_time( + admin_date, + getattr(rp_info, "RadiopharmaceuticalStartTime", None), + ) + + half_life_seconds = None + half_life_val = getattr(rp_info, "RadionuclideHalfLife", None) + if half_life_val is not None: + try: + half_life_seconds = float(half_life_val) + except (TypeError, ValueError): + half_life_seconds = None + + return admin_dt, half_life_seconds + + def apply_qspect_dcm_scaling( image: Image, dir: str, scale_factor: Optional[Tuple[float, float]] = None ) -> Image: - """Read dicom metadata to extract appropriate scaling for Image voxel values. + """Read DICOM metadata to scale quantitative NM/PT images and harmonize decay reference. - Apply scaling to original image and generate a new SimpleITK image object. + NM images are converted using the Real World Value Mapping when available. NM and PT + images are then optionally converted from ADMIN-referenced decay correction to START. + """ + path_dir = Path(dir) + dicom_files = sorted(path_dir.glob("*.dcm")) + if len(dicom_files) == 0: + raise AssertionError(f"No Dicom data was found under {dir}") - Args: - image (Image): Original SimpleITK image. - dir (str): Directory path containing DICOM files. - scale_factor (Optional[Tuple[float, float]], optional): Custom scale factor. Defaults to None. + dcm_data = pydicom.dcmread(str(dicom_files[0]), stop_before_pixels=True, force=True) + modality = getattr(dcm_data, "Modality", None) - Raises - ------ - AssertionError - If wrong modality or multiple DICOM files found. + if modality not in ["NM", "PT"]: + raise AssertionError( + f"Wrong Modality, expecting NM/PT quantitative data, but got {modality}" + ) - Returns - ------- - Image - Scaled SimpleITK image. - """ - if scale_factor is None: - # We use pydicom to access the appropriate tag: - # First, find the SPECT dicom file: - path_dir = Path(dir) - nm_files = [files for files in path_dir.glob("*.dcm")] - dcm_data = pydicom.dcmread(str(nm_files[0])) - - if ( - dcm_data.Modality == "PT" - ): # PET modality stores individual dicom files for each slice, similar to CT. - # Its scaling is readed correctly from SimpleITK, so nothing to do here. - return image - - if dcm_data.Modality != "NM": - raise AssertionError( - f"Wrong Modality, expecting NM for SPECT data, but got {dcm_data.Modality}" - ) + if modality == "NM" and scale_factor is None and len(dicom_files) != 1: + raise AssertionError( + f"Found more than 1 .dcm file inside {path_dir.name}, not sure which one is the right SPECT." + ) - if len(nm_files) != 1: - raise AssertionError( - f"Found more than 1 .dcm file inside {path_dir.name}, not sure which one is the right SPECT." - ) + image_array = numpy.squeeze(SimpleITK.GetArrayFromImage(image)).astype( + numpy.float32 + ) - # If scale_factor is not provided by the user, we assume it is a Q-SPECT file. - slope = dcm_data.RealWorldValueMappingSequence[0].RealWorldValueSlope - intercept = dcm_data.RealWorldValueMappingSequence[0].RealWorldValueIntercept + if scale_factor is not None: + slope = float(scale_factor[0]) + intercept = float(scale_factor[1]) + logger.info( + "Applying user-provided quantitative scaling for %s: slope=%s intercept=%s.", + modality, + slope, + intercept, + ) + image_array = slope * image_array + intercept + elif modality == "NM": + slope = float(dcm_data.RealWorldValueMappingSequence[0].RealWorldValueSlope) + intercept = float( + dcm_data.RealWorldValueMappingSequence[0].RealWorldValueIntercept + ) + logger.info( + "Applying DICOM quantitative scaling for NM: slope=%s intercept=%s.", + slope, + intercept, + ) + image_array = slope * image_array + intercept else: - slope = scale_factor[0] - intercept = scale_factor[1] + logger.info( + "PT quantitative values are assumed to be scaled correctly by SimpleITK; no slope/intercept scaling applied." + ) + + decay_correction = str(getattr(dcm_data, "DecayCorrection", "") or "").upper() + logger.info( + "DICOM DecayCorrection for %s image is '%s'.", + modality, + decay_correction or "MISSING", + ) + + if decay_correction == "START": + logger.info( + "No additional decay normalization is needed because the image is already referenced to acquisition start." + ) + elif decay_correction == "ADMIN": + acquisition_dt = _get_acquisition_start_datetime(dcm_data) + admin_dt, half_life_seconds = _get_radiopharm_admin_datetime_and_half_life( + dcm_data + ) - # Re-scale voxel values - image_array = numpy.squeeze(SimpleITK.GetArrayFromImage(image)) - image_array = slope * image_array.astype(numpy.float32) + intercept + if acquisition_dt is None: + logger.warning( + "DecayCorrection is ADMIN but acquisition start time could not be resolved. Skipping decay normalization to START." + ) + elif admin_dt is None: + logger.warning( + "DecayCorrection is ADMIN but radiopharmaceutical administration time could not be resolved. Skipping decay normalization to START." + ) + elif half_life_seconds is None or half_life_seconds <= 0: + logger.warning( + "DecayCorrection is ADMIN but RadionuclideHalfLife is missing or invalid (%s). Skipping decay normalization to START.", + half_life_seconds, + ) + else: + delta_seconds = (acquisition_dt - admin_dt).total_seconds() + if delta_seconds < 0: + logger.warning( + "DecayCorrection is ADMIN but acquisition start (%s) is earlier than administration time (%s). Skipping decay normalization to START.", + acquisition_dt.isoformat(), + admin_dt.isoformat(), + ) + else: + decay_factor = math.exp( + -math.log(2.0) * delta_seconds / half_life_seconds + ) + logger.info( + "Converting %s image from ADMIN to START decay reference using administration time %s, acquisition time %s, half-life %.6g s, elapsed time %.3f s, factor %.9g.", + modality, + admin_dt.isoformat(), + acquisition_dt.isoformat(), + half_life_seconds, + delta_seconds, + decay_factor, + ) + image_array *= decay_factor + elif decay_correction: + logger.info( + "DecayCorrection '%s' is not explicitly handled. Image values are left unchanged.", + decay_correction, + ) + else: + logger.info("DecayCorrection tag is missing. Image values are left unchanged.") - # Generate re-scaled SimpleITK.Image object by building a new Image from re_scaled array, and copying - # metadata from original image. return itk_image_from_array(array=image_array, ref_image=image) diff --git a/tests/data/voxel_dosimetry_validation/README.md b/tests/data/voxel_dosimetry_validation/README.md new file mode 100644 index 0000000..3606e7c --- /dev/null +++ b/tests/data/voxel_dosimetry_validation/README.md @@ -0,0 +1,17 @@ +# Voxel Dosimetry Validation Assets + +This directory stores optional assets for the slow voxel dosimetry validation test: + +- `rtstructs/`: precomputed RT-STRUCT DICOM files for the SNMMI dosimetry dataset +- `expected_results.csv`: reference subset of `VoxelSDosimetry.results` +- `expected_df_ad.csv`: reference subset of `VoxelSDosimetry.df_ad` +- `voi_mappings_config.json` (optional): custom ROI mapping overrides for the validation run +- `dosimetry_fit_defaults.json` (optional): custom ROI fit defaults for the validation run + +The validation test is implemented in `tests/test_voxel_dosimetry_validation.py`. +It will skip automatically unless the RT-STRUCT directory and both CSV files are present. + +Recommended contents of the CSV files: +- Include only stable, comparable columns. +- Avoid object/list-valued columns such as raw fit-parameter arrays unless you serialize and compare them intentionally. +- Keep the index as ROI names so it matches the DataFrame produced by the pipeline. diff --git a/tests/data/voxel_dosimetry_validation/dosimetry_fit_defaults.json b/tests/data/voxel_dosimetry_validation/dosimetry_fit_defaults.json new file mode 100644 index 0000000..802680a --- /dev/null +++ b/tests/data/voxel_dosimetry_validation/dosimetry_fit_defaults.json @@ -0,0 +1,11 @@ +{ + "_description": "Apply bi-exponential fitting with uptake phase to all organs by default. Users can override these defaults for specific organs or ROIs by providing a custom configuration file.", + "organ_defaults": { + "fit_order": 2, + "with_uptake": true, + "param_init": {"A1": 100, "A2": 0.01}, + "fixed_parameters": null, + "bounds": {"A2": [0.004345, 1.0]}, + "washout_ratio": null + } +} diff --git a/tests/data/voxel_dosimetry_validation/expected_df_ad.csv b/tests/data/voxel_dosimetry_validation/expected_df_ad.csv new file mode 100644 index 0000000..9c4fb54 --- /dev/null +++ b/tests/data/voxel_dosimetry_validation/expected_df_ad.csv @@ -0,0 +1,7 @@ +,AD[Gy],AD[Gy/GBq] +Kidney_Left,2.741417121338495,0.3802018457101281 +Kidney_Right,2.2888928278245335,0.31744212546052364 +Liver,2.347015734546143,0.3255030791335655 +Spleen,3.017681578796798,0.41851642973026076 +RemainderOfBody,0.037553347066800524,0.005208201173128821 +WholeBody,0.10660231643052528,0.014784469371108286 diff --git a/tests/data/voxel_dosimetry_validation/expected_results.csv b/tests/data/voxel_dosimetry_validation/expected_results.csv new file mode 100644 index 0000000..7a6ca88 --- /dev/null +++ b/tests/data/voxel_dosimetry_validation/expected_results.csv @@ -0,0 +1,7 @@ +,Time_hr,Volume_CT_mL,Activity_MBq,Density_HU,Fit_params,R_squared_AIC,TIA_MBq_h,TIA_h,Lambda_eff +Kidney_Left,"[3.741111111111111, 27.725277777777777, 103.09416666666667, 123.98361111111112]","[np.float64(233.60824584960938), np.float64(234.09175872802734), np.float64(247.64728546142578), np.float64(248.5828399658203)]","[98.561432, 73.599408, 25.39568, 19.046609999999998]","[20.60774996325871, 21.191136749734177, 22.187848750563198, 20.703438989020096]","[108.74206751059795, 0.01408142600216577, -108.74206751059795, 0.8454291282326525]","[np.float64(0.999997766586273), np.float64(-18.02683458426659)]",7593.752576123306,1.0531628780002906,"[0.01408142600216577, 0.8454291282326525]" +Kidney_Right,"[3.741111111111111, 27.725277777777777, 103.09416666666667, 123.98361111111112]","[np.float64(250.99468231201172), np.float64(226.09806060791016), np.float64(240.1714324951172), np.float64(231.58836364746094)]","[92.891448, 66.860492, 23.209494, 15.666981999999999]","[21.532457910155138, 21.68748655522796, 22.7863626617111, 20.896461838756704]","[100.10353294428627, 0.014525034301202456, -100.10353294428627, 1.0572026378865176]","[np.float64(0.9996469559443484), np.float64(1.8552701404453744)]",6797.106045193271,0.9426775092905898,"[0.014525034301202456, 1.0572026378865176]" +Liver,"[3.741111111111111, 27.725277777777777, 103.09416666666667, 123.98361111111112]","[np.float64(2037.3516082763672), np.float64(2166.8357849121094), np.float64(2008.5783004760742), np.float64(2248.795509338379)]","[378.4744, 347.40441599999997, 192.66407999999998, 173.225408]","[51.76884621109779, 52.70464004098426, 54.272499497898295, 52.29396916837042]","[426.6982953753453, 0.0074761967214427676, -426.6982953753453, 0.6575597175063645]","[np.float64(0.9987334394598945), np.float64(15.394510161868892)]",56425.334754817464,7.825520695704643,"[0.0074761967214427676, 0.6575597175063645]" +Spleen,"[3.741111111111111, 27.725277777777777, 103.09416666666667, 123.98361111111112]","[np.float64(268.9018249511719), np.float64(273.8170623779297), np.float64(233.95729064941406), np.float64(235.11028289794922)]","[87.20683199999999, 77.633392, 34.655491999999995, 27.94343]","[41.40656253989871, 40.55778112135081, 39.93342627240932, 39.334509655986466]","[104.30127630893381, 0.01065558561011114, -104.30127630893381, 0.5562306512948885]","[np.float64(0.999990386007488), np.float64(-14.177557942224823)]",9600.898902876934,1.3315301254002192,"[0.01065558561011114, 0.5562306512948885]" +RemainderOfBody,"[3.741111111111111, 27.725277777777777, 103.09416666666667, 123.98361111111112]","[np.float64(94709.14363861084), np.float64(94599.15733337402), np.float64(94769.64569091797), np.float64(94535.92300415039)]","[671.643776, 317.521728, 150.297136, 121.737264]","[-689.8872685442167, -700.6898718764443, -695.1787159142423, -688.035938043804]","[668.6311458174671, 0.017797843153233038, -668.6311458174671, 36.97507466187187]","[np.float64(0.9240237820407323), np.float64(38.79977783207139)]",37550.016394303595,5.207739248593175,"[0.017797843153233038, 36.97507466187187]" +WholeBody,"[3.741111111111111, 27.725277777777777, 103.09416666666667, 123.98361111111112]","[np.float64(97500.0), np.float64(97500.0), np.float64(97500.0), np.float64(97500.0)]","[1328.7776, 883.018816, 426.22236799999996, 357.619616]","[-668.8390573941745, -678.4564056396484, -674.3848197350135, -665.7156941047082]","[1332.422362995897, 0.011439505602835902, -1332.422362995897, 32.95741948285121]","[np.float64(0.9806421746620866), np.float64(37.97065402607848)]",116435.09134326481,16.148157932464656,"[0.011439505602835902, 32.95741948285121]" diff --git a/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan1.dcm b/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan1.dcm new file mode 100644 index 0000000..00f1d89 Binary files /dev/null and b/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan1.dcm differ diff --git a/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan2.dcm b/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan2.dcm new file mode 100644 index 0000000..3ef05ca Binary files /dev/null and b/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan2.dcm differ diff --git a/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan3.dcm b/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan3.dcm new file mode 100644 index 0000000..e90c259 Binary files /dev/null and b/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan3.dcm differ diff --git a/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan4.dcm b/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan4.dcm new file mode 100644 index 0000000..577abcb Binary files /dev/null and b/tests/data/voxel_dosimetry_validation/rtstructs/rtstruct_scan4.dcm differ diff --git a/tests/data/voxel_dosimetry_validation/voi_mappings_config.json b/tests/data/voxel_dosimetry_validation/voi_mappings_config.json new file mode 100644 index 0000000..d51989d --- /dev/null +++ b/tests/data/voxel_dosimetry_validation/voi_mappings_config.json @@ -0,0 +1,64 @@ +{ + "_instructions": "Configure VOI/organ naming conventions, mappings, and visualization settings.", + + "valid_organ_names": { + "_description": "List of valid organ/VOI names for validation. These default names are compatible with OLINDA dosimetry calculations. Users can add custom organs as needed for their workflows.", + "names": [ + "Kidney_Left", + "Kidney_Right", + "Liver", + "Spleen", + "Bladder", + "SubmandibularGland_Left", + "SubmandibularGland_Right", + "ParotidGland_Left", + "ParotidGland_Right", + "BoneMarrow", + "Skeleton", + "WholeBody", + "RemainderOfBody", + "TotalTumorBurden" + ] + }, + + "canonical_mappings": { + "_description": "Best-effort ROI name normalization for auto_map mode. Maps abbreviated/common names to canonical organ names. Used when auto_map=True is set in create_studies_with_masks.", + "mappings": { + "LKidney": "Kidney_Left", + "RKidney": "Kidney_Right", + "Liver": "Liver", + "Spleen": "Spleen", + "WholeBody": "WholeBody", + "RemainderOfBody": "RemainderOfBody" + } + }, + + "ct_mappings": { + "LKidney": "Kidney_Left", + "RKidney": "Kidney_Right", + "Liver": "Liver", + "Spleen": "Spleen", + "WholeBody": "WholeBody", + "RemainderOfBody": "RemainderOfBody" + }, + + "spect_mappings": { + "LKidney": "Kidney_Left", + "RKidney": "Kidney_Right", + "Liver": "Liver", + "Spleen": "Spleen", + "WholeBody": "WholeBody", + "RemainderOfBody": "RemainderOfBody" + }, + + "plot_colors": { + "_description": "Color mapping for organ contours in MIP plots. Keys can be organ names or keywords (case-insensitive substring match). Any matplotlib color name or hex code is valid.", + "kidney": "lime", + "parotid": "red", + "submandibular": "red", + "lesion": "magenta", + "liver": "yellow", + "spleen": "cyan", + "tumor": "magenta" + } +} diff --git a/tests/test_dvk.py b/tests/test_dvk.py new file mode 100644 index 0000000..cd90dd2 --- /dev/null +++ b/tests/test_dvk.py @@ -0,0 +1,97 @@ +"""Tests for DoseVoxelKernel CSV loading and selection.""" + +from pathlib import Path + +import numpy as np +import pytest + +import pytheranostics.dosimetry.dvk as dvk +from pytheranostics.dosimetry.dvk import ( + DoseVoxelKernel, + KernelMetadata, + _build_full_kernel_from_octant, + _crop_kernel_around_center, + _select_closest_kernel, +) + + +def _write_kernel_csv(path: Path, octant: np.ndarray) -> None: + """Write a synthetic kernel octant using the Zenodo CSV layout.""" + path.parent.mkdir(parents=True, exist_ok=True) + + with path.open("w", encoding="utf-8", newline="") as file_obj: + for section_idx in range(octant.shape[2]): + block = octant[:, :, section_idx].T + for row in block: + file_obj.write(",".join(f"{value:.6f}" for value in row)) + file_obj.write("\n") + + if section_idx != octant.shape[2] - 1: + file_obj.write(",".join([""] * octant.shape[1])) + file_obj.write("\n") + + +def test_select_closest_kernel_warns_for_large_mismatch( + caplog: pytest.LogCaptureFixture, +): + """A warning should be emitted when the requested voxel size is far from a match.""" + kernels = [ + KernelMetadata(Path("a.csv"), "Lu177", 4.7952, 173), + KernelMetadata(Path("b.csv"), "Lu177", 10.0, 83), + ] + + with caplog.at_level("WARNING"): + selected = _select_closest_kernel(4.5, kernels) + + assert selected.voxel_size_mm == pytest.approx(4.7952) + assert "does not closely match" in caplog.text + + +def test_dose_voxel_kernel_reconstructs_octant_and_crops_by_default( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + """DoseVoxelKernel should rebuild the full kernel and crop it near 25 mm.""" + kernel_dir = tmp_path / "voxel_kernels" + octant = np.arange(1, 65, dtype=np.float64).reshape(4, 4, 4) + kernel_path = kernel_dir / "177Lu" / "177LU_6.647D_99%_10mm_7x7x7.csv" + _write_kernel_csv(kernel_path, octant) + + monkeypatch.setattr(dvk, "_DATA_DIR", kernel_dir) + + kernel = DoseVoxelKernel(isotope="Lu177", voxel_size_mm=10.0) + expected_full = _build_full_kernel_from_octant(octant) + expected_cropped = _crop_kernel_around_center(expected_full, 10.0, 25.0) + + assert kernel.kernel.shape == (3, 3, 3) + assert np.array_equal(kernel.kernel, expected_cropped) + assert kernel.kernel[1, 1, 1] == pytest.approx(octant[0, 0, 0]) + assert np.array_equal(kernel.kernel, np.flip(kernel.kernel, axis=0)) + assert np.array_equal(kernel.kernel, np.flip(kernel.kernel, axis=1)) + assert np.array_equal(kernel.kernel, np.flip(kernel.kernel, axis=2)) + + +def test_dose_voxel_kernel_downloads_when_no_csv_exists( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + """The loader should invoke the download helper when no CSV kernels are present.""" + kernel_dir = tmp_path / "voxel_kernels" + octant = np.arange(1, 9, dtype=np.float64).reshape(2, 2, 2) + created = {"called": False} + + def _fake_download(isotope: str, kernel_dir_arg: Path) -> None: + created["called"] = True + assert isotope == "Lu177" + _write_kernel_csv( + kernel_dir_arg / "177Lu" / "177LU_6.647D_99%_5mm_3x3x3.csv", + octant, + ) + + monkeypatch.setattr(dvk, "_DATA_DIR", kernel_dir) + monkeypatch.setattr(dvk, "_download_kernels_from_zenodo", _fake_download) + + kernel = DoseVoxelKernel( + isotope="Lu177", voxel_size_mm=5.0, crop_kernel_size_mm=None + ) + + assert created["called"] is True + assert kernel.kernel.shape == (3, 3, 3) diff --git a/tests/test_resource_loading.py b/tests/test_resource_loading.py index 57ec9f0..daa22a0 100644 --- a/tests/test_resource_loading.py +++ b/tests/test_resource_loading.py @@ -1,8 +1,5 @@ """Tests for data access via importlib.resources-based helpers.""" -import numpy as np - -from pytheranostics.dosimetry.dvk import DoseVoxelKernel from pytheranostics.dosimetry.olinda import load_phantom_mass @@ -10,10 +7,3 @@ def test_load_phantom_mass_returns_expected_value(): """The packaged phantom mass table should include standard organs.""" liver_mass = load_phantom_mass(gender="Male", organ="Liver") assert liver_mass == 1800 # matches bundled phantom data - - -def test_dose_voxel_kernel_falls_back_to_packaged_kernel(): - """DoseVoxelKernel should load the packaged default kernel if the exact voxel size is missing.""" - kernel = DoseVoxelKernel(isotope="Lu177", voxel_size_mm=5.5) - assert kernel.kernel.shape == (51, 51, 51) - assert kernel.kernel.dtype == np.float64 diff --git a/tests/test_voxel_dosimetry_validation.py b/tests/test_voxel_dosimetry_validation.py new file mode 100644 index 0000000..e421e2f --- /dev/null +++ b/tests/test_voxel_dosimetry_validation.py @@ -0,0 +1,281 @@ +"""Slow validation test for the tutorial-like voxel dosimetry pipeline.""" + +from __future__ import annotations + +import re +import shutil +from pathlib import Path +from typing import Final + +import numpy as np +import pandas as pd +import pytest +from pandas.testing import assert_frame_equal + +from pytheranostics import init_project +from pytheranostics.data_fetchers import fetch_snmmi_dosimetry_challenge +from pytheranostics.dosimetry import build_roi_fit_config +from pytheranostics.dosimetry.voxel_s_dosimetry import VoxelSDosimetry +from pytheranostics.imaging_ds import create_studies_with_masks +from pytheranostics.imaging_ds.dicom_ingest import auto_setup_dosimetry_study_inventory + +_VALIDATION_ASSETS: Final[Path] = ( + Path(__file__).resolve().parent / "data" / "voxel_dosimetry_validation" +) +_RTSTRUCTS_DIR: Final[Path] = _VALIDATION_ASSETS / "rtstructs" +_EXPECTED_RESULTS_CSV: Final[Path] = _VALIDATION_ASSETS / "expected_results.csv" +_EXPECTED_DF_AD_CSV: Final[Path] = _VALIDATION_ASSETS / "expected_df_ad.csv" +_OPTIONAL_CONFIG_FILES: Final[tuple[str, ...]] = ( + "voi_mappings_config.json", + "dosimetry_fit_defaults.json", +) +_LIST_LIKE_RESULTS_COLUMNS: Final[set[str]] = { + "Time_hr", + "Volume_CT_mL", + "Activity_MBq", + "Density_HU", + "Fit_params", + "R_squared_AIC", + "Lambda_eff", +} + + +def _skip_if_validation_assets_missing() -> None: + missing = [ + path.name + for path in (_RTSTRUCTS_DIR, _EXPECTED_RESULTS_CSV, _EXPECTED_DF_AD_CSV) + if not path.exists() + ] + if missing: + pytest.skip( + "Voxel dosimetry validation assets are not available. Missing: " + + ", ".join(missing) + + f". Populate {_VALIDATION_ASSETS} to enable this test." + ) + + +def _copy_optional_validation_configs(project_base: Path) -> None: + for config_name in _OPTIONAL_CONFIG_FILES: + src = _VALIDATION_ASSETS / config_name + if src.exists(): + shutil.copy2(src, project_base / config_name) + + +def _copy_rtstruct_assets(project_base: Path) -> None: + target_dir = project_base / "rtstructs" + shutil.copytree(_RTSTRUCTS_DIR, target_dir, dirs_exist_ok=True) + + +def _load_reference_frame(csv_path: Path) -> pd.DataFrame: + return pd.read_csv(csv_path, index_col=0) + + +def _parse_numeric_list_cell(value: object) -> object: + if not isinstance(value, str): + return value + + stripped = value.strip() + if not (stripped.startswith("[") and stripped.endswith("]")): + return value + + inner = stripped[1:-1].strip() + if not inner: + return [] + + cleaned = re.sub(r"np\.float64\(([^()]*)\)", r"\1", inner) + return [float(item.strip()) for item in cleaned.split(",")] + + +def _normalize_sequence_cell(value: object) -> object: + if isinstance(value, np.ndarray): + return [float(item) for item in value.tolist()] + if isinstance(value, (list, tuple)): + return [float(item) for item in value] + return value + + +def _normalize_frame_cells(frame: pd.DataFrame) -> pd.DataFrame: + normalized = frame.copy() + for column in normalized.columns: + if column in _LIST_LIKE_RESULTS_COLUMNS: + normalized[column] = normalized[column].map(_parse_numeric_list_cell) + normalized[column] = normalized[column].map(_normalize_sequence_cell) + return normalized + + +def _assert_sequence_columns_close( + actual: pd.DataFrame, expected: pd.DataFrame, *, rtol: float, atol: float +) -> None: + for column in expected.columns: + if column not in _LIST_LIKE_RESULTS_COLUMNS: + continue + for roi_name in expected.index: + expected_value = expected.at[roi_name, column] + actual_value = actual.at[roi_name, column] + + if isinstance(expected_value, list): + if not isinstance(actual_value, list): + raise AssertionError( + f"Column '{column}' for ROI '{roi_name}' is not list-like in actual results." + ) + if len(actual_value) != len(expected_value): + raise AssertionError( + f"Column '{column}' for ROI '{roi_name}' has different lengths: " + f"{len(actual_value)} != {len(expected_value)}" + ) + if not np.allclose(actual_value, expected_value, rtol=rtol, atol=atol): + raise AssertionError( + f"Column '{column}' for ROI '{roi_name}' differs. " + f"Actual={actual_value}, Expected={expected_value}" + ) + + +def _prepare_frame_for_comparison( + actual: pd.DataFrame, expected: pd.DataFrame +) -> tuple[pd.DataFrame, pd.DataFrame]: + missing_columns = [col for col in expected.columns if col not in actual.columns] + if missing_columns: + raise AssertionError( + "Actual dataframe is missing expected columns: " + + ", ".join(missing_columns) + ) + + actual_prepared = actual.loc[:, list(expected.columns)].copy() + expected_prepared = expected.copy() + + actual_prepared.index = actual_prepared.index.map(str) + expected_prepared.index = expected_prepared.index.map(str) + + missing_index = [ + idx for idx in expected_prepared.index if idx not in actual_prepared.index + ] + if missing_index: + raise AssertionError( + "Actual dataframe is missing expected index entries: " + + ", ".join(missing_index) + ) + + actual_prepared = actual_prepared.loc[list(expected_prepared.index)] + actual_prepared = _normalize_frame_cells(actual_prepared) + expected_prepared = _normalize_frame_cells(expected_prepared) + + for column in expected_prepared.columns: + if column in _LIST_LIKE_RESULTS_COLUMNS: + continue + if pd.api.types.is_numeric_dtype(expected_prepared[column]): + actual_prepared[column] = pd.to_numeric(actual_prepared[column]) + + actual_prepared = actual_prepared.sort_index() + expected_prepared = expected_prepared.sort_index() + return actual_prepared, expected_prepared + + +@pytest.mark.integration +@pytest.mark.slow +def test_voxel_dosimetry_pipeline_matches_reference(tmp_path: Path) -> None: + _skip_if_validation_assets_missing() + + project_base = tmp_path / "snmmi_dosimetry_validation_project" + init_project(project_base) + _copy_optional_validation_configs(project_base) + + try: + fetch_snmmi_dosimetry_challenge(data_home=str(project_base)) + except RuntimeError as exc: + pytest.skip(f"SNMMI dosimetry dataset could not be fetched: {exc}") + + _copy_rtstruct_assets(project_base) + + study_info, ct_paths, spect_paths, rtstruct_files = ( + auto_setup_dosimetry_study_inventory( + base_dir=project_base, + patient_id=None, + ) + ) + + assert study_info.get("patient_id") is not None + assert ct_paths, "No CT timepoints were discovered by the validation pipeline." + assert ( + spect_paths + ), "No SPECT timepoints were discovered by the validation pipeline." + assert ( + rtstruct_files + ), "No RTSTRUCT files were discovered by the validation pipeline." + + long_ct, long_spect, _, _ = create_studies_with_masks( + patient_id=study_info["patient_id"], + cycle_no=1, + parallel=True, + mapping_config=project_base / "voi_mappings_config.json", + study_info=study_info, + ) + + roi_config = build_roi_fit_config( + longSPECT=long_spect, + config_path=project_base / "dosimetry_fit_defaults.json", + ) + + first_tp_injection = study_info["time_points"][0]["injection_info"] + database_dir = project_base / "dosimetry_database" + results_dir = project_base / "results" + database_dir.mkdir(parents=True, exist_ok=True) + results_dir.mkdir(parents=True, exist_ok=True) + + dosimetry_config = { + "PatientID": study_info["patient_id"], + "Cycle": 1, + "DatabaseDir": str(database_dir), + "results_path": str(results_dir), + "VOIs": roi_config, + "InjectionDate": first_tp_injection["injection_date"], + "InjectionTime": first_tp_injection["injection_time"], + "InjectedActivity": long_spect.meta[0].Injected_Activity_MBq, + "Radionuclide": long_spect.meta[0].Radionuclide, + "PatientWeight_g": first_tp_injection["patient_weight_g"], + "Level": "Voxel", + "Method": "Voxel-S-value", + "ScaleDoseByDensity": False, + "ReferenceTimePoint": 0, + } + + dosimetry = VoxelSDosimetry( + config=dosimetry_config, + nm_data=long_spect, + ct_data=long_ct, + ) + dosimetry.compute_dose() + + expected_results = _load_reference_frame(_EXPECTED_RESULTS_CSV) + expected_df_ad = _load_reference_frame(_EXPECTED_DF_AD_CSV) + + actual_results, expected_results = _prepare_frame_for_comparison( + dosimetry.results, expected_results + ) + actual_df_ad, expected_df_ad = _prepare_frame_for_comparison( + dosimetry.df_ad, expected_df_ad + ) + + _assert_sequence_columns_close( + actual_results, + expected_results, + rtol=1e-4, + atol=1e-6, + ) + assert_frame_equal( + actual_results.drop( + columns=list(_LIST_LIKE_RESULTS_COLUMNS & set(actual_results.columns)) + ), + expected_results.drop( + columns=list(_LIST_LIKE_RESULTS_COLUMNS & set(expected_results.columns)) + ), + check_dtype=False, + rtol=1e-4, + atol=1e-6, + ) + assert_frame_equal( + actual_df_ad, + expected_df_ad, + check_dtype=False, + rtol=1e-4, + atol=1e-6, + )