Skip to content
Open
29 changes: 0 additions & 29 deletions DIMS/MakeInit.R

This file was deleted.

18 changes: 0 additions & 18 deletions DIMS/MakeInit.nf

This file was deleted.

19 changes: 19 additions & 0 deletions DIMS/ParseSamplesheet.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# define parameters
args <- commandArgs(trailingOnly = TRUE)

sample_sheet <- as.data.frame(read.csv(args[1], sep = "\t"))
preprocessing_scripts_dir <- args[2]

# load in function script
source(paste0(preprocessing_scripts_dir, "parse_samplesheet_functions.R"))

# generate the replication pattern
repl_pattern <- generate_repl_pattern(sample_sheet)

# write the replication pattern to text file for troubleshooting purposes
sink("replication_pattern.txt")
print(repl_pattern)
sink()

# save replication pattern to file
save(repl_pattern, file = "init.RData")
17 changes: 17 additions & 0 deletions DIMS/ParseSamplesheet.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
process ParseSamplesheet {
tag "DIMS ParseSamplesheet"
label 'ParseSamplesheet'
container = 'docker://umcugenbioinf/dims:1.3'

input:
path(samplesheet)

output:
path('init.RData'), emit: rdata_file
path('replication_pattern.txt')

script:
"""
Rscript ${baseDir}/CustomModules/DIMS/ParseSamplesheet.R $samplesheet $params.preprocessing_scripts_dir
"""
}
30 changes: 30 additions & 0 deletions DIMS/preprocessing/parse_samplesheet_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# function for parse_samplesheet

#' Generate replication pattern list based on information in sample_sheet
#'
#' @param sample_sheet: matrix of file names and sample names
#'
#' @return ints_sorted: list of sample names with corresponding file names (technical replicates)
generate_repl_pattern <- function(sample_sheet) {
# get the file name and sample name columns from the samplesheet
file_name_col <- grep("File_Name|File Name", colnames(sample_sheet), ignore.case = TRUE)
sample_name_col <- grep("Sample_Name|Sample Name", colnames(sample_sheet), ignore.case = TRUE)
# get the unique sample names from the samplesheet
sample_names <- sample_sheet[sample_name_col] |>
unlist() |>
as.vector() |>
trimws() |>
unique() |>
sort()
# remove all characters from sample_names which are not letters, numbers, hyphens and periods
sample_names <- gsub("[^-.[:alnum:]]", "_", sample_names)

# create replication pattern (which technical replicates belong to which sample)
repl_pattern <- split(
sample_sheet[[file_name_col]],
sample_sheet[[sample_name_col]]
)

return(repl_pattern)
}

24 changes: 24 additions & 0 deletions DIMS/tests/testthat/parse_samplesheet_functions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# unit tests for ParseSamplesheet
# function: generate_repl_pattern

# source all functions for ParseSamplesheet
source("../../preprocessing/parse_samplesheet_functions.R")

# test generate_repl_pattern
testthat::test_that("replication pattern is correctly generated", {
# create sample sheet tot test on:
test_file_names <- paste0(rep("RES_20260101_", 6), sprintf("%03d", 1:6))
test_sample_names <- sort(rep(c("C1", "P2", "P3"), 2))
test_sample_sheet <- as.data.frame(cbind(File_Name = test_file_names, Sample_Name = test_sample_names))

# test that a list of length 3 is generated
expect_length(generate_repl_pattern(test_sample_sheet), 3)
# test list names
expect_equal(names(generate_repl_pattern(test_sample_sheet)), unique(test_sample_names), TRUE)

# test what happens if any sample name is used twice
test_sample_names <- gsub("P3", "P2", test_sample_names)
test_sample_sheet <- as.data.frame(cbind(File_Name = test_file_names, Sample_Name = test_sample_names))
expect_length(generate_repl_pattern(test_sample_sheet), 2)
expect_length(generate_repl_pattern(test_sample_sheet)$P2, 4)
})
Loading