@@ -24,6 +24,10 @@ use FileHandle;
2424
2525use Data::Dumper;
2626
27+ my $WEIGHT = " weight" ;
28+ my $RECURRENCE = " recurrence" ;
29+ my $UNIQUE = " unique" ;
30+
2731sub new {
2832 my $class = shift ;
2933 my $this = {};
@@ -34,10 +38,11 @@ sub new {
3438 $this -> {' p_value_cutoff' } = 0.05;
3539 $this -> {' linear_cutoff' } = 0;
3640 $this -> {' max_radius' } = 10;
37- $this -> {' vertex_type' } = ' recurrence ' ;
41+ $this -> {' vertex_type' } = $RECURRENCE ;
3842 $this -> {' maf_file' } = undef ;
3943 $this -> {' amino_acid_header' } = " amino_acid_change" ;
4044 $this -> {' transcript_id_header' } = " transcript_name" ;
45+ $this -> {' weight_header' } = $WEIGHT ;
4146 bless $this , $class ;
4247 $this -> process();
4348 return $this ;
@@ -59,6 +64,7 @@ sub process {
5964 ' maf-file=s' => \$this -> {' maf_file' },
6065 ' amino-acid-header=s' => \$this -> {' amino_acid_header' },
6166 ' transcript-id-header=s' => \$this -> {' transcript_id_header' },
67+ ' weight-header=s' => \$this -> {' weight_header' },
6268 ' help' => \$help ,
6369 );
6470 if ( $help ) { print STDERR help_text(); exit 0; }
@@ -80,15 +86,16 @@ sub process {
8086 }
8187 unless ( $this -> {' pairwise_file' } ) { warn ' You must provide pairwise file! ' , " \n " ; die $this -> help_text(); }
8288 unless ( -e $this -> {' pairwise_file' } ) { warn " The input pairwise file (" .$this -> {' pairwise_file' }." ) does not exist! " , " \n " ; die $this -> help_text(); }
83- if ( $this -> {' vertex_type' } ne ' recurrence ' and $this -> {' vertex_type' } ne ' unique ' ) {
84- warn " vertex_type option not recognized as \' recurrence\' or \' unique \'\n " ;
89+ if ( $this -> {' vertex_type' } ne $RECURRENCE and $this -> {' vertex_type' } ne $UNIQUE and $this -> { ' vertex_type ' } ne $WEIGHT ) {
90+ warn " vertex_type option not recognized as \' recurrence\' , \' unique \' , or \' weight \'\n " ;
8591 warn " Using default vertex_type = \' recurrence\'\n " ;
86- $this -> {' vertex_type' } = ' recurrence ' ;
92+ $this -> {' vertex_type' } = $RECURRENCE ;
8793 }
88- unless ( ( $this -> {' maf_file' } ) and ( $this -> {' vertex_type' } ne ' unique ' ) ) { warn ' You must provide MAF file if not using unique vertex type! ' , " \n " ; die $this -> help_text(); }
89- unless ( ( -e $this -> {' maf_file' } ) and ( $this -> {' vertex_type' } ne ' unique ' ) ) { warn " The input MAF file )" .$this -> {' maf_file' }." ) does not exist! " , " \n " ; die $this -> help_text(); }
94+ unless ( ( $this -> {' maf_file' } ) and ( $this -> {' vertex_type' } ne $UNIQUE ) ) { warn ' You must provide MAF file if not using unique vertex type! ' , " \n " ; die $this -> help_text(); }
95+ unless ( ( -e $this -> {' maf_file' } ) and ( $this -> {' vertex_type' } ne $UNIQUE ) ) { warn " The input MAF file )" .$this -> {' maf_file' }." ) does not exist! " , " \n " ; die $this -> help_text(); }
9096 # # processing procedure
91- my ( %clusterings , %distance_matrix , %pdb_loc , %aa_map , %master , %mut_chrpos , %locations , %Variants );
97+ my ( %clusterings , %distance_matrix , %pdb_loc , %aa_map , %master , %mut_chrpos , %locations , %Variants , $WEIGHT );
98+ $WEIGHT = " weight" ;
9299# ####
93100# drug-mutation pairs
94101# ####
@@ -171,7 +178,7 @@ sub process {
171178 if ( grep { $_ eq $m2 } @{$aa_map {$gene2 }} ) {
172179 my @mutations = ();
173180 push @mutations , $first ;
174- if ( $this -> {' vertex_type' } eq ' unique ' ) { $m2 =~ s /\D +(\d +)\D +/ $1 / g ; }
181+ if ( $this -> {' vertex_type' } eq $UNIQUE ) { $m2 =~ s /\D +(\d +)\D +/ $1 / g ; }
175182 $second = $gene2 ." :" .$m2 ;
176183 push @mutations , $second ; # @mus2;
177184 my ( $dist , $pval ) = split " :" , $master {$first }{$second };
@@ -293,8 +300,8 @@ sub process {
293300 foreach ( keys %distance_matrix ) {
294301 $degree_connectivity {$_ } = scalar keys %{$distance_matrix {$_ }};
295302 }
296- if ( $this -> {' vertex_type' } ne ' unique ' ) {
297- print STDOUT " \n Preparing to get recurrence ...\n " ;
303+ if ( $this -> {' vertex_type' } ne $UNIQUE ) {
304+ print STDOUT " \n Preparing to get recurrence or weight ...\n " ;
298305 my %variants_from_pairs ;
299306 foreach my $id ( keys %clusterings ) {
300307 foreach my $gene_mut ( @{$clusterings {$id }} ) {
@@ -307,7 +314,7 @@ sub process {
307314 }
308315 }
309316 }
310- # #Mutation recurrence from MAF
317+ # #Mutation recurrence or weight from MAF
311318 my %mutations ;
312319 die " Could not open MAF file\n " unless ( $fh -> open ( $this -> {' maf_file' } , " r" ) );
313320 print STDOUT " \n Reading in MAF ...\n " ;
@@ -334,22 +341,32 @@ sub process {
334341 $mafcols {" Tumor_Sample_Barcode" },
335342 $mafcols {$this -> {" transcript_id_header" }},
336343 $mafcols {$this -> {" amino_acid_header" }} );
344+ if ( $this -> {' vertex_type' } eq $WEIGHT ) {
345+ unless ( defined ( $mafcols {$this -> {" weight_header" }} ) ) { die " \n " ; };
346+ push @mafcols , $mafcols {$this -> {" weight_header" }};
347+ }
337348 map {
338349 chomp ;
339350 my @line = split /\t/;
340351 if ( $#line >= $mafcols [-1] && $#line >= $mafcols [-2] ) { # makes sure custom maf cols are in range
341- my ( $gene , $chr , $start , $stop , $reference , $tumorAllele , $barID , $transcript_name , $aachange ) = @line [@mafcols ];
352+ my ( $gene , $chr , $start , $stop , $reference , $tumorAllele , $barID , $transcript_name , $aachange );
353+ my $weight = 1;
354+ if ( $this -> {' vertex_type' } eq $WEIGHT ) {
355+ ( $gene , $chr , $start , $stop , $reference , $tumorAllele , $barID , $transcript_name , $aachange , $weight ) = @line [@mafcols ];
356+ } else {
357+ ( $gene , $chr , $start , $stop , $reference , $tumorAllele , $barID , $transcript_name , $aachange ) = @line [@mafcols ];
358+ }
342359 my $variant = join ( " _" , ( $gene , $aachange , $chr , $start , $stop ) );
343360 if ( exists $variants_from_pairs {$variant } ) {
344361 my $gene_aachange = $gene ." :" .$aachange ;
345362 if ( exists $Variants {$gene_aachange } ) {
346363 if ( not exists $mutations {$variant }{$barID } ) {
347- $Variants {$gene_aachange }++ ;
348- $mutations {$variant }{$barID }++ ;
364+ $Variants {$gene_aachange } += $weight ;
365+ $mutations {$variant }{$barID } += $weight ;
349366 }
350367 } else {
351- $Variants {$gene_aachange } = 1 ;
352- $mutations {$variant }{$barID } = 1 ;
368+ $Variants {$gene_aachange } = $weight ;
369+ $mutations {$variant }{$barID } = $weight ;
353370 }
354371 }
355372 }
@@ -425,7 +442,7 @@ sub centroid{
425442 my $C = 0;
426443 foreach $other ( keys %{$dist {$current }}) {
427444 $weight = 1; # stays as 1 if vertex_type eq 'unique'
428- if ( $this -> {' vertex_type' } ne ' unique ' ) {
445+ if ( $this -> {' vertex_type' } ne $UNIQUE ) {
429446 if ( exists $Variants -> {$other } ) {
430447 $weight = $Variants -> {$other };
431448 }
@@ -435,13 +452,15 @@ sub centroid{
435452 $C += $weight /( 2**$dist {$current }{$other } );
436453 $count +=1;
437454 # print "$current\t$other\t$dist{$current}{$other}\n";
438- }
439- else {
455+ } else {
440456 $recluster =1;
441457 }
442- }
443- else {
444- $C += $weight -1;
458+ } else { # current is same as other
459+ if ( $this -> {' vertex_type' } eq $WEIGHT ) {
460+ $C += $weight ;
461+ } else {
462+ $C += $weight -1;
463+ }
445464 }
446465 $centrality {$clus_num }{$current } = $C ;
447466 if ( $C > $max ) {
@@ -605,10 +624,11 @@ Usage: hotspot3d cluster [options]
605624--p-value-cutoff P_value cutoff (<), default: 0.05
606625--linear-cutoff Linear distance cutoff (> peptides), default: 20
607626--max-radius Maximum cluster radius (max network geodesic from centroid, <= Angstroms), default: 10
608- --vertex-type Graph vertex type (recurrence or unique ), default: recurrence
627+ --vertex-type Graph vertex type (recurrence, unique, or weight ), default: recurrence
609628--maf-file MAF file used in proximity search step (used if vertex-type = recurrence)
610629--transcript-id-header MAF file column header for transcript id's, default: transcript_name
611630--amino-acid-header MAF file column header for amino acid changes, default: amino_acid_change
631+ --weight-header MAF file column header for mutation weight, default: weight (used if vertex-type = weight)
612632
613633--help this message
614634
0 commit comments