snakemake-workflows · AntonieV · Jun 9, 2021 · Jun 15, 2021 · Jun 16, 2021 · Jun 23, 2021
diff --git a/.test/config/config.yaml b/.test/config/config.yaml
@@ -1,6 +1,6 @@
-# This file should contain everything to configure the workflow on a global scale.
+# This file contains everything to configure the workflow on a global scale.
 # In case of sample based data, it should be complemented by a samples.tsv file that contains
-# one row per sample. It can be parsed easily via pandas.
+# one row per sample. It is parsed in common.smk using pandas (https://pandas.pydata.org/).
 samples: "config/samples.tsv"
 # to download reads from SRA the accession numbers (see https://www.ncbi.nlm.nih.gov/sra) of samples must be given in
 # units.tsv dataset for testing this workflow with single end reads:
@@ -20,9 +20,9 @@ resources:
     release: 101
     # Genome build
     build: GRCh38
-    # for testing data a specific chromosome can be selected
+    # for testing data a single chromosome can be selected (leave empty for a regular analysis)
     chromosome:
-    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), default: 1.2.2
+    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), e.g. 1.2.2
     igenomes_release: 1.2.2
     # if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
     macs-gsize:
@@ -45,14 +45,29 @@ params:
   picard_metrics:
     activate: True
   deseq2:
-    # optional to run vst transform instead of rlog
+    # set to True to use the vst transformation instead of the rlog transformation for the DESeq2 analysis
     vst: True
   peak-annotation-analysis:
     activate: True
   peak-qc:
     activate: True
   consensus-peak-analysis:
     activate: True
+  # samtools view parameter suggestions (for full parameters, see: https://www.htslib.org/doc/samtools-view.html):
+  # if duplicates should be removed in this filtering, add "-F 0x0400" to the params
+  # if for each read, you only want to retain a single (best) mapping, add "-q 1" to params
+  # if you would like to restrict analysis to certain regions (e.g. excluding other "blacklisted" regions),
+  # the -L option is automatically activated if a path to a blacklist of the given genome exists in the
+  # downloaded "resources/ref/igenomes.yaml" or has been provided via the parameter
+  # "config['resources']['ref']['blacklist']" in this configuration file
+  samtools-view-se: "-b -F 0x004"
+  samtools-view-pe: "-b -F 0x004 -G 0x009 -f 0x001"
+  plotfingerprint:
+    # --numberOfSamples parameter of deeptools plotFingerprint, see: https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html#Optional%20arguments
+    number-of-samples: 500000
+  # optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
+  # see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
+  collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
   # TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
   # these cutadapt parameters need to contain the required flag(s) for
   # the type of adapter(s) to trim, i.e.:

diff --git a/.test/config/samples.tsv b/.test/config/samples.tsv
@@ -6,36 +6,35 @@ D	E2	batch2	AJ	ERa
 E	TNFa	batch1	AK	ERa
 F	TNFa	batch2	AL	ERa
 G	E2_TNFa	batch1	AM	ERa
-H	E2_TNFa	batch2	AN	ERa
-I	Veh	batch1	AG	p65
-J	Veh	batch2	AH	p65
-K	E2	batch1	AI	p65
-L	E2	batch2	AJ	p65
-M	TNFa	batch1	AK	p65
-N	TNFa	batch2	AL	p65
-O	E2_TNFa	batch1	AM	p65
-P	E2_TNFa	batch2	AN	p65
-Q	Veh	batch1	AG	FoxA1
-R	Veh	batch2	AH	FoxA1
-S	E2	batch1	AI	FoxA1
-T	E2	batch2	AJ	FoxA1
-U	TNFa	batch1	AK	FoxA1
-V	TNFa	batch2	AL	FoxA1
-W	E2_TNFa	batch1	AM	FoxA1
-X	E2_TNFa	batch2	AM	FoxA1
+H	Veh	batch1	AG	p65
+I	Veh	batch2	AH	p65
+J	E2	batch1	AI	p65
+K	E2	batch2	AJ	p65
+L	TNFa	batch1	AK	p65
+M	TNFa	batch2	AL	p65
+N	E2_TNFa	batch1	AM	p65
+O	E2_TNFa	batch2	AN	p65
+P	Veh	batch1	AG	FoxA1
+Q	Veh	batch2	AH	FoxA1
+R	E2	batch1	AI	FoxA1
+S	E2	batch2	AJ	FoxA1
+T	TNFa	batch1	AK	FoxA1
+U	TNFa	batch2	AL	FoxA1
+V	E2_TNFa	batch1	AM	FoxA1
+W	E2_TNFa	batch2	AM	FoxA1
+X	E2_TNFa	batch1	AM	ERa
 Y	E2_TNFa	batch1	AM	ERa
 Z	E2_TNFa	batch1	AM	ERa
 AA	E2_TNFa	batch1	AM	ERa
-AB	E2_TNFa	batch1	AM	ERa
+AB	E2_TNFa	batch2	AN	ERa
 AC	E2_TNFa	batch2	AN	ERa
 AD	E2_TNFa	batch2	AN	ERa
 AE	E2_TNFa	batch2	AN	ERa
-AF	E2_TNFa	batch2	AN	ERa
-AG	Veh	batch1
-AH	Veh	batch2
-AI	E2	batch1
-AJ	E2	batch2
-AK	TNFa	batch1
-AL	TNFa	batch2
-AM	E2_TNFa	batch1
-AN	E2_TNFa	batch2
+AF	Veh	batch1
+AG	Veh	batch2
+AH	E2	batch1
+AI	E2	batch2
+AJ	TNFa	batch1
+AK	TNFa	batch2
+AL	E2_TNFa	batch1
+AM	E2_TNFa	batch2
diff --git a/.test/config/units.tsv b/.test/config/units.tsv
@@ -1,41 +1,41 @@
-sample	unit	fragment_len_mean	fragment_len_sd	fq1	fq2	sra_accession	platform
-A	1					SRR1635443	ILLUMINA
-B	1					SRR1635444	ILLUMINA
-C	1	300	14			SRR1635445	ILLUMINA
-D	1					SRR1635446	ILLUMINA
-E	1					SRR1635447	ILLUMINA
-F	1					SRR1635448	ILLUMINA
-G	1					SRR1635449	ILLUMINA
-H	2					SRR1635450	ILLUMINA
-I	1					SRR1635451	ILLUMINA
-J	2					SRR1635452	ILLUMINA
-K	1					SRR1635453	ILLUMINA
-L	2					SRR1635454	ILLUMINA
-M	1					SRR1635455	ILLUMINA
-N	2					SRR1635456	ILLUMINA
-O	1					SRR1635457	ILLUMINA
-P	2					SRR1635458	ILLUMINA
-Q	1					SRR1635459	ILLUMINA
-R	2					SRR1635460	ILLUMINA
-S	1					SRR1635461	ILLUMINA
-T	2					SRR1635462	ILLUMINA
-U	1					SRR1635463	ILLUMINA
-V	2					SRR1635464	ILLUMINA
-W	1					SRR1635465	ILLUMINA
-X	2					SRR1635466	ILLUMINA
-Y	1					SRR1635467	ILLUMINA
-Z	2					SRR1635468	ILLUMINA
-AA	1					SRR1635469	ILLUMINA
-AB	2					SRR1635470	ILLUMINA
-AC	1					SRR1635471	ILLUMINA
-AD	2					SRR1635472	ILLUMINA
-AE	1					SRR1635473	ILLUMINA
-AF	2					SRR1635474	ILLUMINA
-AG	1					SRR1635435	ILLUMINA
-AH	2					SRR1635436	ILLUMINA
-AI	1					SRR1635437	ILLUMINA
-AJ	2					SRR1635438	ILLUMINA
-AK	1					SRR1635439	ILLUMINA
-AL	2					SRR1635440	ILLUMINA
-AM	1					SRR1635441	ILLUMINA
-AN	2					SRR1635442	ILLUMINA
+sample	unit	fq1	fq2	sra_accession	platform
+A	1			SRR1635443	ILLUMINA
+B	1			SRR1635444	ILLUMINA
+C	1			SRR1635445	ILLUMINA
+D	1			SRR1635446	ILLUMINA
+E	1			SRR1635447	ILLUMINA
+F	1			SRR1635448	ILLUMINA
+G	1			SRR1635449	ILLUMINA
+G	2			SRR1635450	ILLUMINA
+H	1			SRR1635451	ILLUMINA
+I	1			SRR1635452	ILLUMINA
+J	1			SRR1635453	ILLUMINA
+K	1			SRR1635454	ILLUMINA
+L	1			SRR1635455	ILLUMINA
+M	1			SRR1635456	ILLUMINA
+N	1			SRR1635457	ILLUMINA
+O	1			SRR1635458	ILLUMINA
+P	1			SRR1635459	ILLUMINA
+Q	1			SRR1635460	ILLUMINA
+R	1			SRR1635461	ILLUMINA
+S	1			SRR1635462	ILLUMINA
+T	1			SRR1635463	ILLUMINA
+U	1			SRR1635464	ILLUMINA
+V	1			SRR1635465	ILLUMINA
+W	1			SRR1635466	ILLUMINA
+X	1			SRR1635467	ILLUMINA
+Y	1			SRR1635468	ILLUMINA
+Z	1			SRR1635469	ILLUMINA
+AA	1			SRR1635470	ILLUMINA
+AB	1			SRR1635471	ILLUMINA
+AC	1			SRR1635472	ILLUMINA
+AD	1			SRR1635473	ILLUMINA
+AE	1			SRR1635474	ILLUMINA
+AF	1			SRR1635435	ILLUMINA
+AG	1			SRR1635436	ILLUMINA
+AH	1			SRR1635437	ILLUMINA
+AI	1			SRR1635438	ILLUMINA
+AJ	1			SRR1635439	ILLUMINA
+AK	1			SRR1635440	ILLUMINA
+AL	1			SRR1635441	ILLUMINA
+AM	1			SRR1635442	ILLUMINA
diff --git a/.test/config_paired_end/config.yaml b/.test/config_paired_end/config.yaml
@@ -1,6 +1,6 @@
-# This file should contain everything to configure the workflow on a global scale.
+# This file contains everything to configure the workflow on a global scale.
 # In case of sample based data, it should be complemented by a samples.tsv file that contains
-# one row per sample. It can be parsed easily via pandas.
+# one row per sample. It is parsed in common.smk using pandas (https://pandas.pydata.org/).
 samples: "config_paired_end_reduced/samples.tsv"
 units: "config_paired_end_reduced/units.tsv"
 single_end: False
@@ -17,9 +17,9 @@ resources:
     release: 101
     # Genome build
     build: R64-1-1
-    # for testing data only chromosome 21 is selected
+    # for testing data a single chromosome can be selected (leave empty for a regular analysis)
     chromosome:
-    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), default: 1.2.2
+    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), e.g. 1.2.2
     igenomes_release: 1.2.2
     # if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
     macs-gsize:
@@ -42,14 +42,29 @@ params:
   picard_metrics:
     activate: True
   deseq2:
-    # optional to run vst transform instead of rlog
+    # set to True to use the vst transformation instead of the rlog transformation for the DESeq2 analysis
     vst: True
   peak-annotation-analysis:
     activate: True
   peak-qc:
     activate: True
   consensus-peak-analysis:
     activate: True
+  # samtools view parameter suggestions (for full parameters, see: https://www.htslib.org/doc/samtools-view.html):
+  # if duplicates should be removed in this filtering, add "-F 0x0400" to the params
+  # if for each read, you only want to retain a single (best) mapping, add "-q 1" to params
+  # if you would like to restrict analysis to certain regions (e.g. excluding other "blacklisted" regions),
+  # the -L option is automatically activated if a path to a blacklist of the given genome exists in the
+  # downloaded "resources/ref/igenomes.yaml" or has been provided via the parameter
+  # "config['resources']['ref']['blacklist']" in this configuration file
+  samtools-view-se: "-b -F 0x004"
+  samtools-view-pe: "-b -F 0x004 -G 0x009 -f 0x001"
+  plotfingerprint:
+    # --numberOfSamples parameter of deeptools plotFingerprint, see: https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html#Optional%20arguments
+    number-of-samples: 500000
+  # optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
+  # see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
+  collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
   # TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
   # these cutadapt parameters need to contain the required flag(s) for
   # the type of adapter(s) to trim, i.e.:

diff --git a/.test/config_paired_end/units.tsv b/.test/config_paired_end/units.tsv
@@ -1,7 +1,7 @@
-sample	unit	fragment_len_mean	fragment_len_sd	fq1	fq2	sra_accession	platform
-A	1			data/atacseq/test-datasets/testdata/SRR1822153_1.fastq.gz	data/atacseq/test-datasets/testdata/SRR1822153_2.fastq.gz		ILLUMINA
-B	1			data/atacseq/test-datasets/testdata/SRR1822154_1.fastq.gz	data/atacseq/test-datasets/testdata/SRR1822154_2.fastq.gz		ILLUMINA
-C	1	300	14	data/atacseq/test-datasets/testdata/SRR1822157_1.fastq.gz	data/atacseq/test-datasets/testdata/SRR1822157_2.fastq.gz		ILLUMINA
-D	1			data/atacseq/test-datasets/testdata/SRR1822158_1.fastq.gz	data/atacseq/test-datasets/testdata/SRR1822158_2.fastq.gz		ILLUMINA
-E	1			data/chipseq/test-datasets/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz	data/chipseq/test-datasets/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz		ILLUMINA
-F	1			data/chipseq/test-datasets/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz	data/chipseq/test-datasets/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz		ILLUMINA
+sample	unit	fq1	fq2	sra_accession	platform
+A	1	data/atacseq/test-datasets/testdata/SRR1822153_1.fastq.gz	data/atacseq/test-datasets/testdata/SRR1822153_2.fastq.gz		ILLUMINA
+B	1	data/atacseq/test-datasets/testdata/SRR1822154_1.fastq.gz	data/atacseq/test-datasets/testdata/SRR1822154_2.fastq.gz		ILLUMINA
+C	1	data/atacseq/test-datasets/testdata/SRR1822157_1.fastq.gz	data/atacseq/test-datasets/testdata/SRR1822157_2.fastq.gz		ILLUMINA
+D	1	data/atacseq/test-datasets/testdata/SRR1822158_1.fastq.gz	data/atacseq/test-datasets/testdata/SRR1822158_2.fastq.gz		ILLUMINA
+E	1	data/chipseq/test-datasets/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz	data/chipseq/test-datasets/testdata/SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz		ILLUMINA
+F	1	data/chipseq/test-datasets/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz	data/chipseq/test-datasets/testdata/SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz		ILLUMINA
diff --git a/.test/config_paired_end_reduced/config.yaml b/.test/config_paired_end_reduced/config.yaml
@@ -1,6 +1,6 @@
 # This file should contain everything to configure the workflow on a global scale.
 # In case of sample based data, it should be complemented by a samples.tsv file that contains
-# one row per sample. It can be parsed easily via pandas.
+# one row per sample. It is parsed in common.smk using pandas (https://pandas.pydata.org/).
 samples: "config_paired_end_reduced/samples.tsv"
 units: "config_paired_end_reduced/units.tsv"
 single_end: False
@@ -17,9 +17,9 @@ resources:
     release: 101
     # Genome build
     build: R64-1-1
-    # for testing data a specific chromosome can be selected
+    # for testing data a single chromosome can be selected (leave empty for a regular analysis)
     chromosome: VII
-    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), default: 1.2.2
+    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), e.g. 1.2.2
     igenomes_release: 1.2.2
     # if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
     macs-gsize:
@@ -42,14 +42,29 @@ params:
   picard_metrics:
     activate: True
   deseq2:
-    # optional to run vst transform instead of rlog
+    # set to True to use the vst transformation instead of the rlog transformation for the DESeq2 analysis
     vst: False
   peak-annotation-analysis:
     activate: True
   peak-qc:
     activate: True
   consensus-peak-analysis:
     activate: True
+  # samtools view parameter suggestions (for full parameters, see: https://www.htslib.org/doc/samtools-view.html):
+  # if duplicates should be removed in this filtering, add "-F 0x0400" to the params
+  # if for each read, you only want to retain a single (best) mapping, add "-q 1" to params
+  # if you would like to restrict analysis to certain regions (e.g. excluding other "blacklisted" regions),
+  # the -L option is automatically activated if a path to a blacklist of the given genome exists in the
+  # downloaded "resources/ref/igenomes.yaml" or has been provided via the parameter
+  # "config['resources']['ref']['blacklist']" in this configuration file
+  samtools-view-se: "-b -F 0x004"
+  samtools-view-pe: "-b -F 0x004 -G 0x009 -f 0x001"
+  plotfingerprint:
+    # --numberOfSamples parameter of deeptools plotFingerprint, see: https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html#Optional%20arguments
+    number-of-samples: 500000
+  # optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
+  # see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
+  collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
   # TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
   # these cutadapt parameters need to contain the required flag(s) for
   # the type of adapter(s) to trim, i.e.:

diff --git a/.test/config_paired_end_reduced/units.tsv b/.test/config_paired_end_reduced/units.tsv
@@ -1,6 +1,6 @@
-sample	unit	fragment_len_mean	fragment_len_sd	fq1	fq2	sra_accession	platform
-A	1			data/paired_end_test_data/A-1_vii_1.fastq.gz	data/paired_end_test_data/A-1_vii_2.fastq.gz		ILLUMINA
-B	1			data/paired_end_test_data/B-1_vii_1.fastq.gz	data/paired_end_test_data/B-1_vii_2.fastq.gz		ILLUMINA
-C	1	300	14	data/paired_end_test_data/C-1_vii_1.fastq.gz	data/paired_end_test_data/C-1_vii_2.fastq.gz		ILLUMINA
-D	1			data/paired_end_test_data/D-1_vii_1.fastq.gz	data/paired_end_test_data/D-1_vii_2.fastq.gz		ILLUMINA
-E	1			data/paired_end_test_data/E-1_vii_1.fastq.gz	data/paired_end_test_data/E-1_vii_2.fastq.gz		ILLUMINA
+sample	unit	fq1	fq2	sra_accession	platform
+A	1	data/paired_end_test_data/A-1_vii_1.fastq.gz	data/paired_end_test_data/A-1_vii_2.fastq.gz		ILLUMINA
+B	1	data/paired_end_test_data/B-1_vii_1.fastq.gz	data/paired_end_test_data/B-1_vii_2.fastq.gz		ILLUMINA
+C	1	data/paired_end_test_data/C-1_vii_1.fastq.gz	data/paired_end_test_data/C-1_vii_2.fastq.gz		ILLUMINA
+D	1	data/paired_end_test_data/D-1_vii_1.fastq.gz	data/paired_end_test_data/D-1_vii_2.fastq.gz		ILLUMINA
+E	1	data/paired_end_test_data/E-1_vii_1.fastq.gz	data/paired_end_test_data/E-1_vii_2.fastq.gz		ILLUMINA
diff --git a/.test/config_single_end/config.yaml b/.test/config_single_end/config.yaml
@@ -1,6 +1,6 @@
-# This file should contain everything to configure the workflow on a global scale.
+# This file contains everything to configure the workflow on a global scale.
 # In case of sample based data, it should be complemented by a samples.tsv file that contains
-# one row per sample. It can be parsed easily via pandas.
+# one row per sample. It is parsed in common.smk using pandas (https://pandas.pydata.org/).
 samples: "config_single_end/samples.tsv"
 # to download reads from SRA the accession numbers (see https://www.ncbi.nlm.nih.gov/sra) of samples must be given in
 # units.tsv dataset for testing this workflow with single end reads:
@@ -20,9 +20,9 @@ resources:
     release: 101
     # Genome build
     build: GRCh38
-    # for testing data a specific chromosome can be selected
+    # for testing data a single chromosome can be selected (leave empty for a regular analysis)
     chromosome:
-    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), default: 1.2.2
+    # specify release version number of igenomes list to use (see https://github.com/nf-core/chipseq/releases), e.g. 1.2.2
     igenomes_release: 1.2.2
     # if igenomes.yaml cannot be used, a value for the mappable or effective genome size can be specified here, e.g. macs-gsize: 2.7e9
     macs-gsize:
@@ -45,14 +45,29 @@ params:
   picard_metrics:
     activate: True
   deseq2:
-    # optional to run vst transform instead of rlog
+    # set to True to use the vst transformation instead of the rlog transformation for the DESeq2 analysis
     vst: True
   peak-annotation-analysis:
     activate: True
   peak-qc:
     activate: True
   consensus-peak-analysis:
     activate: True
+  # samtools view parameter suggestions (for full parameters, see: https://www.htslib.org/doc/samtools-view.html):
+  # if duplicates should be removed in this filtering, add "-F 0x0400" to the params
+  # if for each read, you only want to retain a single (best) mapping, add "-q 1" to params
+  # if you would like to restrict analysis to certain regions (e.g. excluding other "blacklisted" regions),
+  # the -L option is automatically activated if a path to a blacklist of the given genome exists in the
+  # downloaded "resources/ref/igenomes.yaml" or has been provided via the parameter
+  # "config['resources']['ref']['blacklist']" in this configuration file
+  samtools-view-se: "-b -F 0x004"
+  samtools-view-pe: "-b -F 0x004 -G 0x009 -f 0x001"
+  plotfingerprint:
+    # --numberOfSamples parameter of deeptools plotFingerprint, see: https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html#Optional%20arguments
+    number-of-samples: 500000
+  # optional parameters for picard's CollectMultipleMetrics from sorted, filtered and merged bam files in post analysis step
+  # see https://gatk.broadinstitute.org/hc/en-us/articles/360037594031-CollectMultipleMetrics-Picard-
+  collect-multiple-metrics: VALIDATION_STRINGENCY=LENIENT
   # TODO: move adapter parameters into a `adapter` column in units.tsv and check for its presence via the units.schema.yaml -- this enables unit-specific adapters, e.g. when integrating multiple datasets
   # these cutadapt parameters need to contain the required flag(s) for
   # the type of adapter(s) to trim, i.e.: