Skip to content

Commit a9ae6c3

Browse files
committed
Incorporate more QC subworkflows into the new subworkflow
* contamination * sex concordance * post-aln methylation check
1 parent d5975e1 commit a9ae6c3

File tree

2 files changed

+133
-23
lines changed

2 files changed

+133
-23
lines changed
Lines changed: 101 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,35 @@
11
version 1.0
22

3-
import "../../../tasks/Utility/PBUtils.wdl"
4-
import "../../../tasks/Utility/BAMutils.wdl" as BU
53
import "../../../tasks/Utility/GeneralUtils.wdl" as GU
6-
7-
import "../../../tasks/Utility/Finalize.wdl" as FF
4+
import "../../../tasks/Utility/Utils.wdl"
85

96
import "../../../tasks/Alignment/AlignAndCheckFingerprintCCS.wdl" as major
10-
import "../../../tasks/QC/AlignedMetrics.wdl"
7+
8+
import "../../TechAgnostic/Utility/LongReadsContaminationEstimation.wdl" as QC1
9+
import "../../TechAgnostic/Utility/SexCheckNaive.wdl" as QC2
10+
import "../../TechAgnostic/Utility/CountTheBeans.wdl" as QC3
11+
12+
import "../../../tasks/Utility/Finalize.wdl" as FF
1113

1214
workflow ProcessOnInstrumentDemuxedChunk {
1315

1416
meta {
1517
desciption: "!!! WARN: THIS IS PROJECT-CENTER SPECIFIC !!! Given an on-instrument demultiplexed hifi_reads.bam, perform alignment and QC check."
1618
}
1719

20+
parameter_meta {
21+
readgroup_id: "ID of a readgroup; used for storing outputs; no whitespaces allowed"
22+
bam_SM_field: "value to place in the SM field of the resulting BAM header's @RG lines"
23+
24+
platform: "PacBio platform used for generating the data; accepted value: [Sequel, Revio]"
25+
26+
fingerprint_store: "Path to the GCS folder holding fingerprint VCFs"
27+
sample_id_at_store: "Sampld ID at the fingerprint store for uniquely locating the fingerprint VCF (assumes some naming convention)"
28+
vbid2_config_json: "Path to Json file specifying config to be passed to VBID2 workflow (cross-individual contamination); currently only supports GRCh38."
29+
expected_sex_type: "Expected sex type of the sample that generated the BAM; accepted value: [M, F, NA]; if provided, performs sex concordance check"
30+
check_postaln_methyl_tags: "if true, gather statistics and reads without MM/ML tags from the aligned BAM"
31+
}
32+
1833
input {
1934
File uBAM
2035
File? uPBI
@@ -23,13 +38,20 @@ workflow ProcessOnInstrumentDemuxedChunk {
2338

2439
String bam_SM_field
2540

26-
String fingerprint_store
27-
String sample_id_at_store
28-
Boolean turn_off_fingperprint_check = false
41+
String platform
2942

3043
File ref_map_file
3144

3245
String gcs_out_root_dir
46+
47+
String disk_type = "SSD"
48+
49+
# args for optional QC subworkflows
50+
String? fingerprint_store
51+
String? sample_id_at_store
52+
File? vbid2_config_json
53+
String? expected_sex_type
54+
Boolean check_postaln_methyl_tags = true
3355
}
3456

3557
###################################################################################
@@ -39,38 +61,83 @@ workflow ProcessOnInstrumentDemuxedChunk {
3961
String workflow_name = "ProcessOnInstrumentDemuxedChunk"
4062
String outdir = sub(gcs_out_root_dir, "/$", "") + "/" + workflow_name
4163

42-
###################################################################################
43-
call BU.GetReadGroupInfo as RG {input: bam = uBAM, keys = ['SM', 'LB', 'PU']}
64+
if (defined(fingerprint_store) != defined(sample_id_at_store)) {
65+
call Utils.StopWorkflow { input: reason = "fingerprint_store and sample_id_at_store must be specified together or omitted together" }
66+
}
4467
68+
###################################################################################
4569
# major work
70+
String fp_store = select_first([fingerprint_store, 'None'])
71+
String fp_smid = select_first([sample_id_at_store, 'None'])
72+
4673
call major.AlignAndCheckFingerprintCCS {
4774
input:
4875
uBAM = uBAM,
4976
uPBI = uPBI,
5077
bam_sample_name = bam_SM_field,
51-
library = RG.read_group_info['LB'],
5278
53-
turn_off_fingperprint_check = turn_off_fingperprint_check,
54-
fp_store = fingerprint_store,
55-
sample_id_at_store = sample_id_at_store,
79+
turn_off_fingperprint_check = !(defined(fingerprint_store)),
80+
fp_store = fp_store,
81+
sample_id_at_store = fp_smid,
5682
ref_map_file = ref_map_file
5783
}
84+
File aBAM = AlignAndCheckFingerprintCCS.aligned_bam
85+
File aBAI = AlignAndCheckFingerprintCCS.aligned_bai
86+
File aPBI = AlignAndCheckFingerprintCCS.aligned_pbi
87+
88+
###################################################################################
89+
# more QCs and metrics
90+
91+
# (optional) contamination
92+
if (defined(vbid2_config_json)) {
93+
Map[String, String] vbid2_config = read_json(select_first([vbid2_config_json]))
94+
call QC1.LongReadsContaminationEstimation as VBID2 { input:
95+
bam=aBAM,
96+
bai=aBAI,
97+
ref_map_file=ref_map_file,
98+
tech = platform,
99+
gt_sites_bed = vbid2_config['genotyping_sites_bed'],
100+
is_hgdp_sites = vbid2_config['is_HGDP_sites'],
101+
is_100k_sites = vbid2_config['is_100K_sites'],
102+
disable_baq = vbid2_config['disable_BAQ'],
103+
disk_type = disk_type,
104+
}
105+
}
106+
107+
# (optional) sex concordance
108+
if (defined(expected_sex_type)) {
109+
call QC2.SexCheckNaive as SexConcordance { input:
110+
bam=aBAM,
111+
bai=aBAI,
112+
expected_sex_type=select_first([expected_sex_type]),
113+
mosdepth_summary_txt=AlignAndCheckFingerprintCCS.coverage_per_chr
114+
}
115+
}
116+
117+
# (optional) verify methylation tags aren't missing
118+
if (check_postaln_methyl_tags) {
119+
call QC3.CountTheBeans as NoMissingBeans { input:
120+
bam=aBAM,
121+
bai=aBAI,
122+
bam_descriptor="POST_ALN",
123+
gcs_out_root_dir=gcs_out_root_dir,
124+
use_local_ssd=disk_type=='LOCAL'
125+
}
126+
}
58127
59128
###################################################################################
60129
# finalize
61-
String movie_name = RG.read_group_info['PU']
62130
String bc_specific_aln_out = outdir + '/alignments/' + readgroup_id
63131
String bc_specific_metric_out = outdir + "/metrics/" + readgroup_id
64132
65-
call FF.FinalizeToFile as FinalizeAlignedBam { input: outdir = bc_specific_aln_out, file = AlignAndCheckFingerprintCCS.aligned_bam, name = readgroup_id + '.bam' }
66-
call FF.FinalizeToFile as FinalizeAlignedBai { input: outdir = bc_specific_aln_out, file = AlignAndCheckFingerprintCCS.aligned_bai, name = readgroup_id + '.bai' }
67-
call FF.FinalizeToFile as FinalizeAlignedPbi { input: outdir = bc_specific_aln_out, file = AlignAndCheckFingerprintCCS.aligned_pbi, name = readgroup_id + '.pbi' }
133+
call FF.FinalizeToFile as FinalizeAlignedBam { input: outdir = bc_specific_aln_out, file = aBAM, name = readgroup_id + '.bam' }
134+
call FF.FinalizeToFile as FinalizeAlignedBai { input: outdir = bc_specific_aln_out, file = aBAI, name = readgroup_id + '.bai' }
135+
call FF.FinalizeToFile as FinalizeAlignedPbi { input: outdir = bc_specific_aln_out, file = aPBI, name = readgroup_id + '.pbi' }
68136
69137
call FF.FinalizeToFile as FinalizePerChrCov { input: outdir = bc_specific_metric_out, file = AlignAndCheckFingerprintCCS.coverage_per_chr }
70-
71138
call FF.FinalizeToFile as FinalizeAlnMetrics { input: outdir = bc_specific_metric_out, file = AlignAndCheckFingerprintCCS.alignment_metrics_tar_gz }
72139
73-
if (! turn_off_fingperprint_check) {
140+
if (defined(fingerprint_store)) {
74141
call FF.FinalizeToFile as FinalizeFPDetails { input: outdir = bc_specific_metric_out, file = select_first([AlignAndCheckFingerprintCCS.fingerprint_detail_tar_gz]) }
75142
}
76143
@@ -86,15 +153,27 @@ workflow ProcessOnInstrumentDemuxedChunk {
86153

87154
File coverage_per_chr = FinalizePerChrCov.gcs_path
88155

89-
Map[String, Float] alignment_metrics = AlignAndCheckFingerprintCCS.alignment_metrics
90156
File alignment_metrics_tar_gz = FinalizeAlnMetrics.gcs_path
157+
Map[String, Float] alignment_metrics = AlignAndCheckFingerprintCCS.alignment_metrics
158+
Map[String, Float] sam_flag_stats = AlignAndCheckFingerprintCCS.sam_flag_stats
91159

92-
String movie = movie_name
160+
String movie = AlignAndCheckFingerprintCCS.movie
93161

162+
# the following QC/metrics aren't always available
163+
# fingerprint QC
94164
String? fingerprint_check_result = AlignAndCheckFingerprintCCS.fp_status
95165
Float? fingerprint_check_LOD = AlignAndCheckFingerprintCCS.fp_lod_expected_sample
96166
File? fingerprint_check_tar_gz = FinalizeFPDetails.gcs_path
97167

168+
# contamination QC
169+
Float? contamination_est = VBID2.contamination_est
170+
171+
# sex concordance QC
172+
Map[String, String]? inferred_sex_info = SexConcordance.inferred_sex_info
173+
174+
# post-alignment methylation
175+
Map[String, String]? methyl_tag_simple_stats = NoMissingBeans.methyl_tag_simple_stats
176+
98177
String last_processing_date = today.yyyy_mm_dd
99178
}
100179
}

wdl/pipelines/PacBio/Utility/ProcessOnInstrumentDemuxedChunkRefFree.wdl

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,29 @@ import "../../../tasks/Utility/GeneralUtils.wdl" as GU
88

99
import "../../../tasks/Utility/Finalize.wdl" as FF
1010

11+
import "../../TechAgnostic/Utility/CountTheBeans.wdl" as QC1
12+
import "../../TechAgnostic/Utility/DystPeaker.wdl" as QC2
13+
import "../../TechAgnostic/Utility/FASTQstats.wdl" as QC3
14+
1115
workflow ProcessOnInstrumentDemuxedChunkRefFree {
1216

1317
meta {
1418
desciption: "!!! WARN: THIS IS PROJECT-CENTER SPECIFIC !!! Given an on-instrument demultiplexed hifi_reads.bam, perform ref-independent prep work."
1519
}
1620

21+
parameter_meta {
22+
bam_descriptor: "a one-word description of the input BAM (used for saving the reads that don't have any MM/ML tags)"
23+
short_reads_threshold: "a length threshold below which reads are classified as short"
24+
}
25+
1726
input {
1827
File uBAM
1928

2029
String readgroup_id
2130

31+
String bam_descriptor
32+
Int short_reads_threshold
33+
2234
String gcs_out_root_dir
2335
}
2436

@@ -35,14 +47,22 @@ workflow ProcessOnInstrumentDemuxedChunkRefFree {
3547
call NanoPlot.NanoPlotFromUBam {input: uBAM = uBAM}
3648
3749
call BU.BamToFastq {input: bam = uBAM, prefix = "does_not_matter"}
50+
3851
###################################################################################
3952
# finalize
4053
call BU.GetReadGroupInfo as RG {input: bam = uBAM, keys = ['PU']}
4154
String movie_name = RG.read_group_info['PU']
42-
4355
String bc_specific_fastq_out = outdir_ref_free + '/' + readgroup_id
56+
4457
call FF.FinalizeToFile as FinalizeFQ { input: outdir = bc_specific_fastq_out, file = BamToFastq.reads_fq, name = readgroup_id + '.hifi.fq.gz' }
4558
59+
###################################################################################
60+
# more QCs and metrics
61+
62+
call QC1.CountTheBeans { input: bam=uBAM, bam_descriptor=bam_descriptor, gcs_out_root_dir=gcs_out_root_dir }
63+
call QC2.DystPeaker { input: input_file=uBAM, input_is_bam=true, id=readgroup_id, short_reads_threshold=short_reads_threshold, gcs_out_root_dir=gcs_out_root_dir }
64+
call QC3.FASTQstats { input: reads=BamToFastq.reads_fq, file_type='FASTQ' }
65+
4666
###################################################################################
4767
4868
call GU.GetTodayDate as today {}
@@ -51,7 +71,18 @@ workflow ProcessOnInstrumentDemuxedChunkRefFree {
5171
String movie = movie_name
5272

5373
File hifi_fq = FinalizeFQ.gcs_path
74+
# todo merge these two together
5475
Map[String, Float] hifi_stats = NanoPlotFromUBam.stats_map
76+
Map[String, Float] hifi_fq_stats = FASTQstats.stats
77+
78+
# read length metrics
79+
File read_len_hist = DystPeaker.read_len_hist
80+
Array[Int] read_len_peaks = DystPeaker.read_len_peaks
81+
Array[Int] read_len_deciles = DystPeaker.read_len_deciles
82+
Map[String, String] read_len_summaries = DystPeaker.read_len_summaries
83+
84+
# methylation call rate stats
85+
Map[String, String] methyl_tag_simple_stats = CountTheBeans.methyl_tag_simple_stats
5586

5687
String last_processing_date = today.yyyy_mm_dd
5788
}

0 commit comments

Comments
 (0)