Skip to content

Commit 5c00b12

Browse files
committed
new cluster option, custom weighting
1 parent 1055bb4 commit 5c00b12

File tree

4 files changed

+45
-25
lines changed

4 files changed

+45
-25
lines changed

HotSpot3D-0.5.0.tar.gz

8.24 MB
Binary file not shown.

bin/hotspot3d

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use strict;
99
use warnings;
1010

11-
our $VERSION = 'V0.4';
11+
our $VERSION = 'V0.5.0';
1212

1313
use Carp;
1414
use FileHandle;

dist.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = HotSpot3D
22
author = Beifang Niu, John Wallis, Adam D Scott, & Sohini Sengupta from McDonnell Genome Institute of Washington University at St. Louis
3-
version = 0.4
3+
version = 0.5.0
44
license = Perl_5
55
copyright_holder = McDonnell Genome Institute at Washington University
66
copyright_year = 2013

lib/TGI/Mutpro/Main/Cluster.pm

Lines changed: 43 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ use FileHandle;
2424

2525
use Data::Dumper;
2626

27+
my $WEIGHT = "weight";
28+
my $RECURRENCE = "recurrence";
29+
my $UNIQUE = "unique";
30+
2731
sub new {
2832
my $class = shift;
2933
my $this = {};
@@ -34,10 +38,11 @@ sub new {
3438
$this->{'p_value_cutoff'} = 0.05;
3539
$this->{'linear_cutoff'} = 0;
3640
$this->{'max_radius'} = 10;
37-
$this->{'vertex_type'} = 'recurrence';
41+
$this->{'vertex_type'} = $RECURRENCE;
3842
$this->{'maf_file'} = undef;
3943
$this->{'amino_acid_header'} = "amino_acid_change";
4044
$this->{'transcript_id_header'} = "transcript_name";
45+
$this->{'weight_header'} = $WEIGHT;
4146
bless $this, $class;
4247
$this->process();
4348
return $this;
@@ -59,6 +64,7 @@ sub process {
5964
'maf-file=s' => \$this->{'maf_file'},
6065
'amino-acid-header=s' => \$this->{'amino_acid_header'},
6166
'transcript-id-header=s' => \$this->{'transcript_id_header'},
67+
'weight-header=s' => \$this->{'weight_header'},
6268
'help' => \$help,
6369
);
6470
if ( $help ) { print STDERR help_text(); exit 0; }
@@ -80,15 +86,16 @@ sub process {
8086
}
8187
unless( $this->{'pairwise_file'} ) { warn 'You must provide pairwise file! ', "\n"; die $this->help_text(); }
8288
unless( -e $this->{'pairwise_file'} ) { warn "The input pairwise file (".$this->{'pairwise_file'}.") does not exist! ", "\n"; die $this->help_text(); }
83-
if ( $this->{'vertex_type'} ne 'recurrence' and $this->{'vertex_type'} ne 'unique' ) {
84-
warn "vertex_type option not recognized as \'recurrence\' or \'unique\'\n";
89+
if ( $this->{'vertex_type'} ne $RECURRENCE and $this->{'vertex_type'} ne $UNIQUE and $this->{'vertex_type'} ne $WEIGHT ) {
90+
warn "vertex_type option not recognized as \'recurrence\', \'unique\', or \'weight\'\n";
8591
warn "Using default vertex_type = \'recurrence\'\n";
86-
$this->{'vertex_type'} = 'recurrence';
92+
$this->{'vertex_type'} = $RECURRENCE;
8793
}
88-
unless( ( $this->{'maf_file'} ) and ( $this->{'vertex_type'} ne 'unique' ) ) { warn 'You must provide MAF file if not using unique vertex type! ', "\n"; die $this->help_text(); }
89-
unless( ( -e $this->{'maf_file'} ) and ( $this->{'vertex_type'} ne 'unique' ) ) { warn "The input MAF file )".$this->{'maf_file'}.") does not exist! ", "\n"; die $this->help_text(); }
94+
unless( ( $this->{'maf_file'} ) and ( $this->{'vertex_type'} ne $UNIQUE ) ) { warn 'You must provide MAF file if not using unique vertex type! ', "\n"; die $this->help_text(); }
95+
unless( ( -e $this->{'maf_file'} ) and ( $this->{'vertex_type'} ne $UNIQUE ) ) { warn "The input MAF file )".$this->{'maf_file'}.") does not exist! ", "\n"; die $this->help_text(); }
9096
## processing procedure
91-
my ( %clusterings , %distance_matrix , %pdb_loc, %aa_map, %master, %mut_chrpos , %locations , %Variants );
97+
my ( %clusterings , %distance_matrix , %pdb_loc, %aa_map, %master, %mut_chrpos , %locations , %Variants , $WEIGHT );
98+
$WEIGHT = "weight";
9299
#####
93100
# drug-mutation pairs
94101
#####
@@ -171,7 +178,7 @@ sub process {
171178
if ( grep{ $_ eq $m2 } @{$aa_map{$gene2}} ) {
172179
my @mutations = ();
173180
push @mutations , $first;
174-
if ( $this->{'vertex_type'} eq 'unique' ) { $m2 =~ s/\D+(\d+)\D+/$1/g; }
181+
if ( $this->{'vertex_type'} eq $UNIQUE ) { $m2 =~ s/\D+(\d+)\D+/$1/g; }
175182
$second = $gene2.":".$m2;
176183
push @mutations , $second; #@mus2;
177184
my ( $dist , $pval ) = split ":" , $master{$first}{$second};
@@ -293,8 +300,8 @@ sub process {
293300
foreach ( keys %distance_matrix ) {
294301
$degree_connectivity{$_} = scalar keys %{$distance_matrix{$_}};
295302
}
296-
if ( $this->{'vertex_type'} ne 'unique' ) {
297-
print STDOUT "\nPreparing to get recurrence ...\n";
303+
if ( $this->{'vertex_type'} ne $UNIQUE ) {
304+
print STDOUT "\nPreparing to get recurrence or weight ...\n";
298305
my %variants_from_pairs;
299306
foreach my $id ( keys %clusterings ) {
300307
foreach my $gene_mut ( @{$clusterings{$id}} ) {
@@ -307,7 +314,7 @@ sub process {
307314
}
308315
}
309316
}
310-
##Mutation recurrence from MAF
317+
##Mutation recurrence or weight from MAF
311318
my %mutations;
312319
die "Could not open MAF file\n" unless( $fh->open( $this->{'maf_file'} , "r" ) );
313320
print STDOUT "\nReading in MAF ...\n";
@@ -334,22 +341,32 @@ sub process {
334341
$mafcols{"Tumor_Sample_Barcode"},
335342
$mafcols{$this->{"transcript_id_header"}},
336343
$mafcols{$this->{"amino_acid_header"}} );
344+
if ( $this->{'vertex_type'} eq $WEIGHT ) {
345+
unless( defined( $mafcols{$this->{"weight_header"}} ) ) { die "\n"; };
346+
push @mafcols , $mafcols{$this->{"weight_header"}};
347+
}
337348
map {
338349
chomp;
339350
my @line = split /\t/;
340351
if ( $#line >= $mafcols[-1] && $#line >= $mafcols[-2] ) { #makes sure custom maf cols are in range
341-
my ( $gene , $chr , $start , $stop , $reference , $tumorAllele , $barID , $transcript_name , $aachange ) = @line[@mafcols];
352+
my ( $gene , $chr , $start , $stop , $reference , $tumorAllele , $barID , $transcript_name , $aachange );
353+
my $weight = 1;
354+
if ( $this->{'vertex_type'} eq $WEIGHT ) {
355+
( $gene , $chr , $start , $stop , $reference , $tumorAllele , $barID , $transcript_name , $aachange , $weight ) = @line[@mafcols];
356+
} else {
357+
( $gene , $chr , $start , $stop , $reference , $tumorAllele , $barID , $transcript_name , $aachange ) = @line[@mafcols];
358+
}
342359
my $variant = join( "_" , ( $gene , $aachange , $chr , $start , $stop ) );
343360
if ( exists $variants_from_pairs{$variant} ) {
344361
my $gene_aachange = $gene.":".$aachange;
345362
if ( exists $Variants{$gene_aachange} ) {
346363
if ( not exists $mutations{$variant}{$barID} ) {
347-
$Variants{$gene_aachange}++;
348-
$mutations{$variant}{$barID}++;
364+
$Variants{$gene_aachange} += $weight;
365+
$mutations{$variant}{$barID} += $weight;
349366
}
350367
} else {
351-
$Variants{$gene_aachange} = 1;
352-
$mutations{$variant}{$barID} = 1;
368+
$Variants{$gene_aachange} = $weight;
369+
$mutations{$variant}{$barID} = $weight;
353370
}
354371
}
355372
}
@@ -425,7 +442,7 @@ sub centroid{
425442
my $C = 0;
426443
foreach $other ( keys %{$dist{$current}}) {
427444
$weight = 1; #stays as 1 if vertex_type eq 'unique'
428-
if ( $this->{'vertex_type'} ne 'unique' ) {
445+
if ( $this->{'vertex_type'} ne $UNIQUE ) {
429446
if ( exists $Variants->{$other} ) {
430447
$weight = $Variants->{$other};
431448
}
@@ -435,13 +452,15 @@ sub centroid{
435452
$C += $weight/( 2**$dist{$current}{$other} );
436453
$count+=1;
437454
#print "$current\t$other\t$dist{$current}{$other}\n";
438-
}
439-
else{
455+
} else{
440456
$recluster=1;
441457
}
442-
}
443-
else {
444-
$C += $weight -1;
458+
} else { #current is same as other
459+
if ( $this->{'vertex_type'} eq $WEIGHT ) {
460+
$C += $weight;
461+
} else {
462+
$C += $weight -1;
463+
}
445464
}
446465
$centrality{$clus_num}{$current} = $C;
447466
if ( $C > $max ) {
@@ -605,10 +624,11 @@ Usage: hotspot3d cluster [options]
605624
--p-value-cutoff P_value cutoff (<), default: 0.05
606625
--linear-cutoff Linear distance cutoff (> peptides), default: 20
607626
--max-radius Maximum cluster radius (max network geodesic from centroid, <= Angstroms), default: 10
608-
--vertex-type Graph vertex type (recurrence or unique), default: recurrence
627+
--vertex-type Graph vertex type (recurrence, unique, or weight), default: recurrence
609628
--maf-file MAF file used in proximity search step (used if vertex-type = recurrence)
610629
--transcript-id-header MAF file column header for transcript id's, default: transcript_name
611630
--amino-acid-header MAF file column header for amino acid changes, default: amino_acid_change
631+
--weight-header MAF file column header for mutation weight, default: weight (used if vertex-type = weight)
612632
613633
--help this message
614634

0 commit comments

Comments
 (0)