1
1
#!/bin/python
2
2
# CharGer - Characterization of Germline variants
3
3
# author: Adam D Scott ([email protected] ) & Kuan-lin Huang ([email protected] )
4
- # version: v0.2.1 - 2017*05
4
+ # version: v0.3.0 - 2017*09
5
5
6
6
import sys
7
7
import getopt
8
8
from charger import charger
9
9
import time
10
+ import argparse
10
11
11
12
def parseArgs ( argv ):
12
- helpText = "\n CharGer - v0.2.1 \n \n "
13
+ helpText = "\n CharGer - v0.3.0 \n \n "
13
14
helpText += "Usage: "
14
15
helpText += "charger <input file> [options]\n \n "
15
16
helpText += "Accepted input data files:\n "
@@ -38,7 +39,10 @@ def parseArgs( argv ):
38
39
helpText += " -a assumed de novo file, standard .maf\n "
39
40
helpText += " -c co-segregation file, standard .maf\n "
40
41
helpText += " -H HotSpot3D clusters file, .clusters\n "
41
- helpText += " -r recurrence threshold (default = 2)\n "
42
+ helpText += "Thresholds:\n "
43
+ helpText += " --recurrence-threshold HotSpot3D recurrence threshold (default = 2)\n "
44
+ helpText += " --rare-threshold Allele frequency threshold for rare (default = 0.0005 (0.05%)):\n "
45
+ helpText += " --common-threshold Allele frequency threshold for common (default = 0.005 (0.5%)):\n "
42
46
helpText += "Local VEP (works with .vcf input only; suppresses ReST too):\n "
43
47
helpText += " --vep-script Path to VEP\n "
44
48
helpText += " --vep-config config-file for VEP\n "
@@ -57,9 +61,9 @@ def parseArgs( argv ):
57
61
helpText += " --mac-clinvar-tsv ClinVar from MacArthur lab (clinvar_alleles.tsv.gz)\n "
58
62
#helpText += " --mac-clinvar-vcf ClinVar from MacArthur lab (clinvar_alleles.vcf.gz)\n"
59
63
helpText += "Filters:\n "
60
- helpText += " --rare Allele frequency threshold for rare/common (default = 1, process variant with any frequency):\n "
61
- helpText += " --vcf-any-filter Allow variants that do not pass all filters in .vcf input (flag)\n "
62
- helpText += " --mutation-types Comma delimited list (no spaces) of types to allow \n "
64
+ helpText += " --frequency-filter Keep if allele frequency lower (default = 1, process variant with any frequency):\n "
65
+ helpText += " --vcf-any-filter Keep variants that do not pass all filters in .vcf input (flag)\n "
66
+ helpText += " --mutation-types Keep types, as a comma delimited list (no spaces)\n "
63
67
helpText += "ReST batch sizes:\n "
64
68
helpText += " -v VEP (#variants, default/max allowed = 150)\n "
65
69
helpText += " -b ClinVar summary (#variants, default/max allowed = 500)\n "
@@ -76,7 +80,6 @@ def parseArgs( argv ):
76
80
helpText += " -C codon\n "
77
81
helpText += " -p peptide change\n "
78
82
helpText += " -L variant classification\n "
79
- helpText += " -F allele frequency\n "
80
83
helpText += "\n "
81
84
helpText += " -h this message\n "
82
85
helpText += "\n "
@@ -117,35 +120,38 @@ def parseArgs( argv ):
117
120
clustersFile = None
118
121
pathogenicVariantsFile = None
119
122
annotateInput = ""
120
- vepScript = ""
121
- vepConfig = ""
122
- #vepDir = None
123
+ vepScript = None
124
+ vepConfig = None
123
125
vepCache = None
124
126
vepOutput = None
125
- ensemblRelease = ""
126
- vepVersion = ""
127
- grch = ""
128
- fork = ""
127
+ ensemblRelease = str ( 75 )
128
+ vepVersion = str ( 87 )
129
+ grch = str ( 37 )
130
+ fork = str ( 1 )
129
131
referenceFasta = None
130
132
exacVCF = None
131
133
macClinVarVCF = None
132
134
macClinVarTSV = None
133
135
doURLTest = True
134
- thresholdAF = 1
136
+ rareAF = 0.0005 #from germline studies
137
+ commonAF = 0.05 #from ACMG suggestion
138
+ keepAF = 1
135
139
anyFilter = False
136
140
mutationTypes = []
137
141
138
142
try :
139
143
#haven't used ijquy
140
- charCommands = "DEtlxhwOkX:s:A:R:S:P:M:G:m:f:T:o:v:b:B:p:C:F: g:d:e:n:a:c:r :H:z:L:"
144
+ charCommands = "DEtlxhwOkX:s:A:R:S:P:M:G:m:f:T:o:v:b:B:p:C:g:d:e:n:a:c:H:z:L:"
141
145
opts , args = getopt .getopt ( argv , charCommands , \
142
146
["maf=" , "vcf=" , "tsv=" , "output=" , "use-tcga" , \
143
147
"run-vep" , "run-clinvar" , "run-exac" , \
144
148
"vepBatchSize=" , "summaryBatchSize=" , "searchBatchSize=" , \
145
149
"peptideChange=" , "codon=" , "alleleFrequency=" , \
146
150
"geneList=" , "diseases=" , "expression=" , \
147
151
"deNovo=" , "assumedDeNovo=" , "coSegregation=" , \
148
- "recurrence=" , "rare=" , "vcf-any-filter" , "mutation-types=" , \
152
+ "rare-threshold=" , "common-threshold=" , \
153
+ "recurrence-threshold=" , "frequency-filter=" , \
154
+ "vcf-any-filter" , "mutation-types=" , \
149
155
"hotspot3d=" , "pathogenicVariants=" , \
150
156
"vep-script=" , "vep-config=" , "vep-dir=" , "vep-cache=" , "vep-output=" , \
151
157
"ensembl-release=" , "vep-version=" , \
@@ -233,10 +239,14 @@ def parseArgs( argv ):
233
239
asHTML = True
234
240
elif opt in ( "-O" , "--override" ):
235
241
override = True
236
- elif opt in ( "-r" , "--recurrence" ):
242
+ elif opt in ( "-r" , "--recurrence-threshold " ):
237
243
recurrenceThreshold = float ( arg )
238
- elif opt in ( "--rare" ):
239
- thresholdAF = float ( arg )
244
+ elif opt in ( "--rare-threshold" ):
245
+ rareAF = float ( arg )
246
+ elif opt in ( "--common-threshold" ):
247
+ commonAF = float ( arg )
248
+ elif opt in ( "--frequency-filter" ):
249
+ keepAF = float ( arg )
240
250
elif opt in ( "--mutation-types" ):
241
251
mutationTypes = arg .split ( "," )
242
252
elif opt in ( "--vcf-any-filter" ):
@@ -315,7 +325,9 @@ def parseArgs( argv ):
315
325
"variantClassificationColumn" : variantClassificationColumn , \
316
326
"alleleFrequencyColumn" : alleleFrequencyColumn , \
317
327
"recurrenceThreshold" : recurrenceThreshold , \
318
- "thresholdAF" : thresholdAF , \
328
+ "rareAF" : rareAF , \
329
+ "commonAF" : commonAF , \
330
+ "keepAF" : keepAF , \
319
331
"anyFilter" : anyFilter , \
320
332
"mutationTypes" : mutationTypes , \
321
333
"clustersFile" : clustersFile , \
@@ -378,7 +390,9 @@ def main( argv ):
378
390
asHTML = values ["html" ]
379
391
override = values ["override" ]
380
392
recurrenceThreshold = values ["recurrenceThreshold" ]
381
- thresholdAF = values ["thresholdAF" ]
393
+ rareAF = values ["rareAF" ]
394
+ commonAF = values ["commonAF" ]
395
+ keepAF = values ["keepAF" ]
382
396
anyFilter = values ["anyFilter" ]
383
397
mutationTypes = values ["mutationTypes" ]
384
398
clustersFile = values ["clustersFile" ]
@@ -452,7 +466,9 @@ def main( argv ):
452
466
peptideChange = peptideChangeColumn , \
453
467
variantClassification = variantClassificationColumn , \
454
468
alleleFrequency = alleleFrequencyColumn , \
455
- thresholdAF = thresholdAF , \
469
+ rareAF = rareAF , \
470
+ commonAF = commonAF , \
471
+ keepAF = keepAF , \
456
472
anyFilter = anyFilter , \
457
473
mutationTypes = mutationTypes , \
458
474
)
@@ -495,16 +511,16 @@ def main( argv ):
495
511
exacVCF = exacVCF , \
496
512
macClinVarTSV = macClinVarTSV , \
497
513
macClinVarVCF = macClinVarVCF , \
498
- thresholdAF = thresholdAF , \
514
+ rareAF = rareAF , \
515
+ commonAF = commonAF , \
516
+ keepAF = keepAF , \
499
517
anyFilter = anyFilter , \
500
518
mutationTypes = mutationTypes , \
501
519
#timeout=(20,20) , \
502
520
)
503
521
504
522
t3 = time .time ()
505
523
506
- rareThreshold = 0.0005 #from germline studies
507
- commonThreshold = 0.05 #from ACMG suggestion
508
524
minimumEvidence = 2
509
525
510
526
CharGer .PVS1 ( )
@@ -513,7 +529,7 @@ def main( argv ):
513
529
CharGer .PS3 ( )
514
530
CharGer .PS4 ( )
515
531
CharGer .PM1 ( recurrenceThreshold , hotspot3d = clustersFile )
516
- CharGer .PM2 ( rareThreshold )
532
+ CharGer .PM2 ( rareAF )
517
533
CharGer .PM3 ( )
518
534
CharGer .PM4 ( )
519
535
CharGer .PM5 ( )
@@ -524,7 +540,7 @@ def main( argv ):
524
540
CharGer .PP4 ( )
525
541
CharGer .PP5 ( )
526
542
527
- CharGer .BA1 ( commonThreshold )
543
+ CharGer .BA1 ( commonAF )
528
544
CharGer .BS1 ( )
529
545
CharGer .BS2 ( )
530
546
CharGer .BS3 ( )
@@ -537,10 +553,15 @@ def main( argv ):
537
553
CharGer .BP6 ( )
538
554
CharGer .BP7 ( )
539
555
556
+ CharGer .PSC1 ( )
557
+ CharGer .PMC1 ( )
558
+ CharGer .PPC1 ( )
559
+ CharGer .PPC2 ( )
560
+
540
561
CharGer .BSC1 ( )
541
562
CharGer .BMC1 ( )
542
563
543
- print ( str ( rareThreshold ) + " < " + str ( commonThreshold ) )
564
+ print ( str ( rareAF ) + " < " + str ( commonAF ) )
544
565
t4 = time .time ()
545
566
546
567
CharGer .classify ( system = "ACMG" )
0 commit comments