1
1
version 1.0
2
2
3
- import "../../../tasks/Utility/PBUtils.wdl"
4
- import "../../../tasks/Utility/BAMutils.wdl" as BU
5
3
import "../../../tasks/Utility/GeneralUtils.wdl" as GU
6
-
7
- import "../../../tasks/Utility/Finalize.wdl" as FF
4
+ import "../../../tasks/Utility/Utils.wdl"
8
5
9
6
import "../../../tasks/Alignment/AlignAndCheckFingerprintCCS.wdl" as major
10
- import "../../../tasks/QC/AlignedMetrics.wdl"
7
+
8
+ import "../../TechAgnostic/Utility/LongReadsContaminationEstimation.wdl" as QC1
9
+ import "../../TechAgnostic/Utility/SexCheckNaive.wdl" as QC2
10
+ import "../../TechAgnostic/Utility/CountTheBeans.wdl" as QC3
11
+
12
+ import "../../../tasks/Utility/Finalize.wdl" as FF
11
13
12
14
workflow ProcessOnInstrumentDemuxedChunk {
13
15
14
16
meta {
15
17
desciption : "!!! WARN: THIS IS PROJECT-CENTER SPECIFIC !!! Given an on-instrument demultiplexed hifi_reads.bam, perform alignment and QC check."
16
18
}
17
19
20
+ parameter_meta {
21
+ readgroup_id : "ID of a readgroup; used for storing outputs; no whitespaces allowed"
22
+ bam_SM_field : "value to place in the SM field of the resulting BAM header's @RG lines"
23
+
24
+ platform : "PacBio platform used for generating the data; accepted value: [Sequel, Revio]"
25
+
26
+ fingerprint_store : "Path to the GCS folder holding fingerprint VCFs"
27
+ sample_id_at_store : "Sampld ID at the fingerprint store for uniquely locating the fingerprint VCF (assumes some naming convention)"
28
+ vbid2_config_json : "Path to Json file specifying config to be passed to VBID2 workflow (cross-individual contamination); currently only supports GRCh38."
29
+ expected_sex_type : "Expected sex type of the sample that generated the BAM; accepted value: [M, F, NA]; if provided, performs sex concordance check"
30
+ check_postaln_methyl_tags : "if true, gather statistics and reads without MM/ML tags from the aligned BAM"
31
+ }
32
+
18
33
input {
19
34
File uBAM
20
35
File ? uPBI
@@ -23,13 +38,20 @@ workflow ProcessOnInstrumentDemuxedChunk {
23
38
24
39
String bam_SM_field
25
40
26
- String fingerprint_store
27
- String sample_id_at_store
28
- Boolean turn_off_fingperprint_check = false
41
+ String platform
29
42
30
43
File ref_map_file
31
44
32
45
String gcs_out_root_dir
46
+
47
+ String disk_type = "SSD"
48
+
49
+ # args for optional QC subworkflows
50
+ String ? fingerprint_store
51
+ String ? sample_id_at_store
52
+ File ? vbid2_config_json
53
+ String ? expected_sex_type
54
+ Boolean check_postaln_methyl_tags = true
33
55
}
34
56
35
57
###################################################################################
@@ -39,38 +61,83 @@ workflow ProcessOnInstrumentDemuxedChunk {
39
61
String workflow_name = "ProcessOnInstrumentDemuxedChunk"
40
62
String outdir = sub (gcs_out_root_dir , "/$" , "" ) + "/" + workflow_name
41
63
42
- ###################################################################################
43
- call BU .GetReadGroupInfo as RG {input : bam = uBAM , keys = ['SM' , 'LB' , 'PU' ]}
64
+ if (defined (fingerprint_store ) != defined (sample_id_at_store )) {
65
+ call Utils .StopWorkflow { input : reason = "fingerprint_store and sample_id_at_store must be specified together or omitted together" }
66
+ }
44
67
68
+ ###################################################################################
45
69
# major work
70
+ String fp_store = select_first ([fingerprint_store , 'None' ])
71
+ String fp_smid = select_first ([sample_id_at_store , 'None' ])
72
+
46
73
call major .AlignAndCheckFingerprintCCS {
47
74
input :
48
75
uBAM = uBAM ,
49
76
uPBI = uPBI ,
50
77
bam_sample_name = bam_SM_field ,
51
- library = RG .read_group_info ['LB' ],
52
78
53
- turn_off_fingperprint_check = turn_off_fingperprint_check ,
54
- fp_store = fingerprint_store ,
55
- sample_id_at_store = sample_id_at_store ,
79
+ turn_off_fingperprint_check = !( defined ( fingerprint_store )) ,
80
+ fp_store = fp_store ,
81
+ sample_id_at_store = fp_smid ,
56
82
ref_map_file = ref_map_file
57
83
}
84
+ File aBAM = AlignAndCheckFingerprintCCS .aligned_bam
85
+ File aBAI = AlignAndCheckFingerprintCCS .aligned_bai
86
+ File aPBI = AlignAndCheckFingerprintCCS .aligned_pbi
87
+
88
+ ###################################################################################
89
+ # more QCs and metrics
90
+
91
+ # (optional) contamination
92
+ if (defined (vbid2_config_json )) {
93
+ Map [String , String ] vbid2_config = read_json (select_first ([vbid2_config_json ]))
94
+ call QC1 .LongReadsContaminationEstimation as VBID2 { input :
95
+ bam =aBAM ,
96
+ bai =aBAI ,
97
+ ref_map_file =ref_map_file ,
98
+ tech = platform ,
99
+ gt_sites_bed = vbid2_config ['genotyping_sites_bed' ],
100
+ is_hgdp_sites = vbid2_config ['is_HGDP_sites' ],
101
+ is_100k_sites = vbid2_config ['is_100K_sites' ],
102
+ disable_baq = vbid2_config ['disable_BAQ' ],
103
+ disk_type = disk_type ,
104
+ }
105
+ }
106
+
107
+ # (optional) sex concordance
108
+ if (defined (expected_sex_type )) {
109
+ call QC2 .SexCheckNaive as SexConcordance { input :
110
+ bam =aBAM ,
111
+ bai =aBAI ,
112
+ expected_sex_type =select_first ([expected_sex_type ]),
113
+ mosdepth_summary_txt =AlignAndCheckFingerprintCCS .coverage_per_chr
114
+ }
115
+ }
116
+
117
+ # (optional) verify methylation tags aren't missing
118
+ if (check_postaln_methyl_tags ) {
119
+ call QC3 .CountTheBeans as NoMissingBeans { input :
120
+ bam =aBAM ,
121
+ bai =aBAI ,
122
+ bam_descriptor ="POST_ALN" ,
123
+ gcs_out_root_dir =gcs_out_root_dir ,
124
+ use_local_ssd =disk_type =='LOCAL'
125
+ }
126
+ }
58
127
59
128
###################################################################################
60
129
# finalize
61
- String movie_name = RG .read_group_info ['PU' ]
62
130
String bc_specific_aln_out = outdir + '/alignments/' + readgroup_id
63
131
String bc_specific_metric_out = outdir + "/metrics/" + readgroup_id
64
132
65
- call FF .FinalizeToFile as FinalizeAlignedBam { input : outdir = bc_specific_aln_out , file = AlignAndCheckFingerprintCCS . aligned_bam , name = readgroup_id + '.bam' }
66
- call FF .FinalizeToFile as FinalizeAlignedBai { input : outdir = bc_specific_aln_out , file = AlignAndCheckFingerprintCCS . aligned_bai , name = readgroup_id + '.bai' }
67
- call FF .FinalizeToFile as FinalizeAlignedPbi { input : outdir = bc_specific_aln_out , file = AlignAndCheckFingerprintCCS . aligned_pbi , name = readgroup_id + '.pbi' }
133
+ call FF .FinalizeToFile as FinalizeAlignedBam { input : outdir = bc_specific_aln_out , file = aBAM , name = readgroup_id + '.bam' }
134
+ call FF .FinalizeToFile as FinalizeAlignedBai { input : outdir = bc_specific_aln_out , file = aBAI , name = readgroup_id + '.bai' }
135
+ call FF .FinalizeToFile as FinalizeAlignedPbi { input : outdir = bc_specific_aln_out , file = aPBI , name = readgroup_id + '.pbi' }
68
136
69
137
call FF .FinalizeToFile as FinalizePerChrCov { input : outdir = bc_specific_metric_out , file = AlignAndCheckFingerprintCCS .coverage_per_chr }
70
-
71
138
call FF .FinalizeToFile as FinalizeAlnMetrics { input : outdir = bc_specific_metric_out , file = AlignAndCheckFingerprintCCS .alignment_metrics_tar_gz }
72
139
73
- if (! turn_off_fingperprint_check ) {
140
+ if (defined ( fingerprint_store ) ) {
74
141
call FF .FinalizeToFile as FinalizeFPDetails { input : outdir = bc_specific_metric_out , file = select_first ([AlignAndCheckFingerprintCCS .fingerprint_detail_tar_gz ]) }
75
142
}
76
143
@@ -86,15 +153,27 @@ workflow ProcessOnInstrumentDemuxedChunk {
86
153
87
154
File coverage_per_chr = FinalizePerChrCov .gcs_path
88
155
89
- Map [String , Float ] alignment_metrics = AlignAndCheckFingerprintCCS .alignment_metrics
90
156
File alignment_metrics_tar_gz = FinalizeAlnMetrics .gcs_path
157
+ Map [String , Float ] alignment_metrics = AlignAndCheckFingerprintCCS .alignment_metrics
158
+ Map [String , Float ] sam_flag_stats = AlignAndCheckFingerprintCCS .sam_flag_stats
91
159
92
- String movie = movie_name
160
+ String movie = AlignAndCheckFingerprintCCS . movie
93
161
162
+ # the following QC/metrics aren't always available
163
+ # fingerprint QC
94
164
String ? fingerprint_check_result = AlignAndCheckFingerprintCCS .fp_status
95
165
Float ? fingerprint_check_LOD = AlignAndCheckFingerprintCCS .fp_lod_expected_sample
96
166
File ? fingerprint_check_tar_gz = FinalizeFPDetails .gcs_path
97
167
168
+ # contamination QC
169
+ Float ? contamination_est = VBID2 .contamination_est
170
+
171
+ # sex concordance QC
172
+ Map [String , String ]? inferred_sex_info = SexConcordance .inferred_sex_info
173
+
174
+ # post-alignment methylation
175
+ Map [String , String ]? methyl_tag_simple_stats = NoMissingBeans .methyl_tag_simple_stats
176
+
98
177
String last_processing_date = today .yyyy_mm_dd
99
178
}
100
179
}
0 commit comments