From a418d70c486db602867dc3f42cdaf4ad61dca348 Mon Sep 17 00:00:00 2001 From: ssjunnebo Date: Thu, 19 Mar 2026 07:49:18 +0100 Subject: [PATCH 1/8] remove side letter from FCIDs to match what's scanned in the lab --- dataflow_transfer/run_classes/element_runs.py | 7 ++++--- dataflow_transfer/run_classes/illumina_runs.py | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dataflow_transfer/run_classes/element_runs.py b/dataflow_transfer/run_classes/element_runs.py index a3ea109..2a208b4 100644 --- a/dataflow_transfer/run_classes/element_runs.py +++ b/dataflow_transfer/run_classes/element_runs.py @@ -9,9 +9,6 @@ class ElementRun(Run): def __init__(self, run_dir, configuration): super().__init__(run_dir, configuration) self.final_file = "RunUploaded.json" - self.flowcell_id = self.run_id.split("_")[ - -1 - ] # This is true for all except Teton runs @register_run_class @@ -24,4 +21,8 @@ def __init__(self, run_dir, configuration): self.run_id_format = ( r"^\d{8}_AV\d{6}_(A|BP)\d{10}$" # 20251007_AV242106_A2507535225 ) + self.flowcell_id = self.run_id.split("_")[-1][1:] # 2507535225 super().__init__(run_dir, configuration) + + +# TODO: Add Teton run class diff --git a/dataflow_transfer/run_classes/illumina_runs.py b/dataflow_transfer/run_classes/illumina_runs.py index 7eab9bb..6c5ac11 100644 --- a/dataflow_transfer/run_classes/illumina_runs.py +++ b/dataflow_transfer/run_classes/illumina_runs.py @@ -9,7 +9,6 @@ class IlluminaRun(Run): def __init__(self, run_dir, configuration): super().__init__(run_dir, configuration) self.final_file = "CopyComplete.txt" - self.flowcell_id = self.run_id.split("_")[-1] @register_run_class @@ -22,6 +21,7 @@ def __init__(self, run_dir, configuration): self.run_id_format = ( r"^\d{8}_[A-Z0-9]+_\d{4}_[A-Z0-9]+$" # 20251010_LH00202_0284_B22CVHTLT1 ) + self.flowcell_id = self.run_id.split("_")[-1][1:] # 22CVHTLT1 super().__init__(run_dir, configuration) @@ -35,6 +35,7 @@ def __init__(self, run_dir, configuration): self.run_id_format = ( r"^\d{6}_[A-Z0-9]+_\d{3}_[A-Z0-9]+$" # 251015_VH00203_572_AAHFHCCM5 ) + self.flowcell_id = self.run_id.split("_")[-1] # AAHFHCCM5 super().__init__(run_dir, configuration) @@ -48,6 +49,7 @@ def __init__(self, run_dir, configuration): self.run_id_format = ( r"^\d{6}_[A-Z0-9]+_\d{4}_[A-Z0-9\-]+$" # 251015_M01548_0646_000000000-M6D7K ) + self.flowcell_id = self.run_id.split("_")[-1] # 000000000-M6D7K super().__init__(run_dir, configuration) @@ -59,4 +61,5 @@ class MiSeqi100Run(IlluminaRun): def __init__(self, run_dir, configuration): self.run_id_format = r"^\d{8}_[A-Z0-9]+_\d{4}_[A-Z0-9]{10}-SC3$" # 20260128_SH01140_0002_ASC2150561-SC3 + self.flowcell_id = self.run_id.split("_")[-1][1:] # SC2150561-SC3 super().__init__(run_dir, configuration) From 65dc93e48d3b02ec269426210e06c523bf0568cc Mon Sep 17 00:00:00 2001 From: ssjunnebo Date: Thu, 19 Mar 2026 08:10:14 +0100 Subject: [PATCH 2/8] Set flowcell_id after init --- dataflow_transfer/run_classes/element_runs.py | 2 +- dataflow_transfer/run_classes/illumina_runs.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dataflow_transfer/run_classes/element_runs.py b/dataflow_transfer/run_classes/element_runs.py index 2a208b4..6dc3da8 100644 --- a/dataflow_transfer/run_classes/element_runs.py +++ b/dataflow_transfer/run_classes/element_runs.py @@ -21,8 +21,8 @@ def __init__(self, run_dir, configuration): self.run_id_format = ( r"^\d{8}_AV\d{6}_(A|BP)\d{10}$" # 20251007_AV242106_A2507535225 ) - self.flowcell_id = self.run_id.split("_")[-1][1:] # 2507535225 super().__init__(run_dir, configuration) + self.flowcell_id = self.run_id.split("_")[-1][1:] # 2507535225 # TODO: Add Teton run class diff --git a/dataflow_transfer/run_classes/illumina_runs.py b/dataflow_transfer/run_classes/illumina_runs.py index 6c5ac11..3c99cac 100644 --- a/dataflow_transfer/run_classes/illumina_runs.py +++ b/dataflow_transfer/run_classes/illumina_runs.py @@ -21,8 +21,8 @@ def __init__(self, run_dir, configuration): self.run_id_format = ( r"^\d{8}_[A-Z0-9]+_\d{4}_[A-Z0-9]+$" # 20251010_LH00202_0284_B22CVHTLT1 ) - self.flowcell_id = self.run_id.split("_")[-1][1:] # 22CVHTLT1 super().__init__(run_dir, configuration) + self.flowcell_id = self.run_id.split("_")[-1][1:] # 22CVHTLT1 @register_run_class @@ -35,8 +35,8 @@ def __init__(self, run_dir, configuration): self.run_id_format = ( r"^\d{6}_[A-Z0-9]+_\d{3}_[A-Z0-9]+$" # 251015_VH00203_572_AAHFHCCM5 ) - self.flowcell_id = self.run_id.split("_")[-1] # AAHFHCCM5 super().__init__(run_dir, configuration) + self.flowcell_id = self.run_id.split("_")[-1] # AAHFHCCM5 @register_run_class @@ -49,8 +49,8 @@ def __init__(self, run_dir, configuration): self.run_id_format = ( r"^\d{6}_[A-Z0-9]+_\d{4}_[A-Z0-9\-]+$" # 251015_M01548_0646_000000000-M6D7K ) - self.flowcell_id = self.run_id.split("_")[-1] # 000000000-M6D7K super().__init__(run_dir, configuration) + self.flowcell_id = self.run_id.split("_")[-1] # 000000000-M6D7K @register_run_class @@ -61,5 +61,5 @@ class MiSeqi100Run(IlluminaRun): def __init__(self, run_dir, configuration): self.run_id_format = r"^\d{8}_[A-Z0-9]+_\d{4}_[A-Z0-9]{10}-SC3$" # 20260128_SH01140_0002_ASC2150561-SC3 - self.flowcell_id = self.run_id.split("_")[-1][1:] # SC2150561-SC3 super().__init__(run_dir, configuration) + self.flowcell_id = self.run_id.split("_")[-1][1:] # SC2150561-SC3 From 71307a2161c233742bdf673678d02b04763d8593 Mon Sep 17 00:00:00 2001 From: ssjunnebo Date: Thu, 19 Mar 2026 08:10:37 +0100 Subject: [PATCH 3/8] Add test to make sure the fc id is what's expected --- dataflow_transfer/tests/test_run_classes.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index 58238cb..7f2c36b 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -169,6 +169,20 @@ def test_confirm_run_type(run_fixture, expected_run_type, request): run_obj.confirm_run_type() +@pytest.mark.parametrize( + "run_fixture, expected_flowcell", + [ + ("novaseqxplus_testobj", "22CVHTLT1"), + ("nextseq_testobj", "AAHFHCCM5"), + ("miseqseq_testobj", "000000000-M6D7K"), + ("miseqseqi100_testobj", "SC2150561-SC3"), + ], +) +def test_flowcell_id_is_computed(run_fixture, expected_flowcell, request): + run_obj = request.getfixturevalue(run_fixture) + assert run_obj.flowcell_id == expected_flowcell + + @pytest.mark.parametrize( "run_fixture", [ From 56f16fcbbfdd50a66cd46d4145d9799320e25feb Mon Sep 17 00:00:00 2001 From: ssjunnebo Date: Thu, 19 Mar 2026 08:23:38 +0100 Subject: [PATCH 4/8] Set path to metadata for each sequencer in config --- dataflow_transfer/run_classes/generic_runs.py | 3 +-- dataflow_transfer/tests/test_run_classes.py | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dataflow_transfer/run_classes/generic_runs.py b/dataflow_transfer/run_classes/generic_runs.py index daf88dc..351bc87 100644 --- a/dataflow_transfer/run_classes/generic_runs.py +++ b/dataflow_transfer/run_classes/generic_runs.py @@ -25,8 +25,7 @@ def __init__(self, run_dir, configuration): self.run_dir, ".metadata_rsync_exitcode" ) self.metadata_destination = os.path.join( - self.configuration.get("metadata_archive"), - getattr(self, "run_type", None), + self.sequencer_config.get("metadata_archive"), self.run_id, ) self.final_rsync_exitcode_file = os.path.join( diff --git a/dataflow_transfer/tests/test_run_classes.py b/dataflow_transfer/tests/test_run_classes.py index 7f2c36b..e9bd993 100644 --- a/dataflow_transfer/tests/test_run_classes.py +++ b/dataflow_transfer/tests/test_run_classes.py @@ -12,7 +12,6 @@ def novaseqxplus_testobj(tmp_path): config = { "log": {"file": "test.log"}, "transfer_details": {"user": "testuser", "host": "testhost"}, - "metadata_archive": "/data/metadata_archive", "statusdb": { "username": "dbuser", "password": "dbpass", @@ -23,6 +22,7 @@ def novaseqxplus_testobj(tmp_path): "NovaSeqXPlus": { "remote_destination": "/data/NovaSeqXPlus", "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "metadata_archive": "/data/metadata_archive/NovaSeqXPlus", "ignore_folders": ["nosync"], "remote_rsync_options": ["--chmod=Dg+s,g+rw"], "metadata_rsync_options": [ @@ -44,7 +44,6 @@ def nextseq_testobj(tmp_path): config = { "log": {"file": "test.log"}, "transfer_details": {"user": "testuser", "host": "testhost"}, - "metadata_archive": "/data/metadata_archive", "statusdb": { "username": "dbuser", "password": "dbpass", @@ -55,6 +54,7 @@ def nextseq_testobj(tmp_path): "NextSeq": { "remote_destination": "/data/NextSeq", "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "metadata_archive": "/data/metadata_archive/NextSeq", "ignore_folders": ["nosync"], "remote_rsync_options": ["--chmod=Dg+s,g+rw"], "metadata_rsync_options": [ @@ -76,7 +76,6 @@ def miseqseq_testobj(tmp_path): config = { "log": {"file": "test.log"}, "transfer_details": {"user": "testuser", "host": "testhost"}, - "metadata_archive": "/data/metadata_archive", "statusdb": { "username": "dbuser", "password": "dbpass", @@ -87,6 +86,7 @@ def miseqseq_testobj(tmp_path): "MiSeq": { "remote_destination": "/data/MiSeq", "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "metadata_archive": "/data/metadata_archive/MiSeq", "ignore_folders": ["nosync"], "remote_rsync_options": ["--chmod=Dg+s,g+rw"], "metadata_rsync_options": [ @@ -108,7 +108,6 @@ def miseqseqi100_testobj(tmp_path): config = { "log": {"file": "test.log"}, "transfer_details": {"user": "testuser", "host": "testhost"}, - "metadata_archive": "/data/metadata_archive", "statusdb": { "username": "dbuser", "password": "dbpass", @@ -119,6 +118,7 @@ def miseqseqi100_testobj(tmp_path): "MiSeqi100": { "remote_destination": "/data/MiSeqi100", "metadata_for_statusdb": ["RunInfo.xml", "RunParameters.xml"], + "metadata_archive": "/data/metadata_archive/MiSeqi100", "ignore_folders": ["nosync"], "remote_rsync_options": ["--chmod=Dg+s,g+rw"], "metadata_rsync_options": [ From 92f5f4aaef4b6a2f6af872af3f97f2736572b954 Mon Sep 17 00:00:00 2001 From: ssjunnebo Date: Thu, 19 Mar 2026 08:30:10 +0100 Subject: [PATCH 5/8] Documentation --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 56af976..5797632 100644 --- a/README.md +++ b/README.md @@ -83,8 +83,6 @@ log: run_one_path: /usr/bin/run-one -metadata_archive: /path/to/metadata/archive - transfer_details: user: username host: remote.host.com @@ -99,6 +97,7 @@ sequencers: NovaSeqXPlus: sequencing_path: /sequencing/NovaSeqXPlus remote_destination: /Illumina/NovaSeqXPlus + metadata_archive: /path/to/metadata/archive/NovaSeqXPlus_data metadata_for_statusdb: - RunInfo.xml - RunParameters.xml @@ -150,6 +149,7 @@ Run status is tracked in CouchDB with events including: - Remote storage is accessible via rsync over SSH - CouchDB is accessible and the database exists - Metadata files (e.g., RunInfo.xml) are present in run directories for status database updates and sync to metadata archive location +- The flowcell ID is set to correspond to the ID that is scanned with a barcode scanner during sequencing setup in the lab ### Status Files From 40a599e27da5e68c6efb0f71e206ee51d3df39d6 Mon Sep 17 00:00:00 2001 From: ssjunnebo Date: Thu, 19 Mar 2026 08:30:35 +0100 Subject: [PATCH 6/8] Versioning --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 675894c..c8c4f30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ ignore = [ [project] name = "dataflow_transfer" -version = "1.1.1" +version = "1.1.2" description = "Script for transferring sequencing data from sequencers to storage" authors = [ { name = "Sara Sjunnebo", email = "sara.sjunnebo@scilifelab.se" }, From 89b82b55a58c399c2e357289cc7c32fbfd2fc663 Mon Sep 17 00:00:00 2001 From: ssjunnebo Date: Thu, 19 Mar 2026 13:50:59 +0100 Subject: [PATCH 7/8] Fix typo in Aviti run id regex --- dataflow_transfer/run_classes/element_runs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataflow_transfer/run_classes/element_runs.py b/dataflow_transfer/run_classes/element_runs.py index 6dc3da8..2b543e2 100644 --- a/dataflow_transfer/run_classes/element_runs.py +++ b/dataflow_transfer/run_classes/element_runs.py @@ -19,7 +19,7 @@ class AVITIRun(ElementRun): def __init__(self, run_dir, configuration): self.run_id_format = ( - r"^\d{8}_AV\d{6}_(A|BP)\d{10}$" # 20251007_AV242106_A2507535225 + r"^\d{8}_AV\d{6}_(A|B)\d{10}$" # 20251007_AV242106_A2507535225 ) super().__init__(run_dir, configuration) self.flowcell_id = self.run_id.split("_")[-1][1:] # 2507535225 From 72c6c60855685fa164b1dd9f65a64ec8e0f1343e Mon Sep 17 00:00:00 2001 From: ssjunnebo Date: Thu, 19 Mar 2026 13:54:30 +0100 Subject: [PATCH 8/8] Set default FCID in IlluminaRun --- dataflow_transfer/run_classes/illumina_runs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dataflow_transfer/run_classes/illumina_runs.py b/dataflow_transfer/run_classes/illumina_runs.py index 3c99cac..12cfa11 100644 --- a/dataflow_transfer/run_classes/illumina_runs.py +++ b/dataflow_transfer/run_classes/illumina_runs.py @@ -9,6 +9,7 @@ class IlluminaRun(Run): def __init__(self, run_dir, configuration): super().__init__(run_dir, configuration) self.final_file = "CopyComplete.txt" + self.flowcell_id = self.run_id.split("_")[-1] @register_run_class @@ -36,7 +37,6 @@ def __init__(self, run_dir, configuration): r"^\d{6}_[A-Z0-9]+_\d{3}_[A-Z0-9]+$" # 251015_VH00203_572_AAHFHCCM5 ) super().__init__(run_dir, configuration) - self.flowcell_id = self.run_id.split("_")[-1] # AAHFHCCM5 @register_run_class @@ -50,7 +50,6 @@ def __init__(self, run_dir, configuration): r"^\d{6}_[A-Z0-9]+_\d{4}_[A-Z0-9\-]+$" # 251015_M01548_0646_000000000-M6D7K ) super().__init__(run_dir, configuration) - self.flowcell_id = self.run_id.split("_")[-1] # 000000000-M6D7K @register_run_class