@@ -25,9 +25,10 @@ use FileHandle;
2525sub new {
2626 my $class = shift ;
2727 my $this = {};
28- $this -> {' clusters_file' } = ' 3D_Proximity.pairwise.singleprotein.collapsed. clusters' ;
28+ $this -> {' clusters_file' } = ' 3D_Proximity.pairwise.clusters' ;
2929 $this -> {' output_prefix' } = undef ;
3030 $this -> {mutationmass } = {};
31+ $this -> {recurrencemass } = {};
3132 $this -> {drugmass } = {};
3233 $this -> {degrees } = {};
3334 $this -> {centralities } = {};
@@ -41,6 +42,13 @@ sub new {
4142
4243sub process {
4344 my $this = shift ;
45+ $this -> setOptions();
46+ $this -> readClustersFile();
47+ $this -> writeSummary();
48+ }
49+
50+ sub setOptions {
51+ my ( $this ) = shift ;
4452 my ( $help , $options );
4553 unless ( @ARGV ) { die $this -> help_text(); }
4654 $options = GetOptions (
@@ -52,24 +60,25 @@ sub process {
5260 unless ( $options ) { die $this -> help_text(); }
5361 unless ( $this -> {' clusters_file' } ) { warn ' You must provide a clusters file! ' , " \n " ; die $this -> help_text(); }
5462 unless ( -e $this -> {' clusters_file' } ) { warn " The input clusters file (" .$this -> {' clusters_file' }." ) does not exist! " , " \n " ; die $this -> help_text(); }
63+ return ;
64+ }
5565
56- my ( $genes , $degrees , $centralities , $geodesics , $clustertotal , $centroids ) = {};
57- my ( $mutationmass , $drugmass ) = {};
58- # my ( $DrugBank , $NIH ) = {};
59- # my ( $DMKB , $DMKBs ) = {};
60- # my ( $family , $domains ) = {};
61-
62- my $infh = new FileHandle;
63- unless ( $infh -> open ( $this -> {' clusters_file' } , " r" ) ) { die " Could not open clusters file $! \n " };
64- my $fh = new FileHandle;
66+ sub generateOutputFileName {
67+ my ( $this ) = @_ ;
6568 my $outFilename = " " ;
6669 if ( defined $this -> {' output_prefix' } ) {
6770 $outFilename = $this -> {' output_prefix' };
6871 } else {
6972 $outFilename = $this -> {' clusters_file' };
7073 }
7174 $outFilename .= " .summary" ;
72- unless ( $fh -> open ( $outFilename , " w" ) ) { die " Could not open $outFilename $! \n " ; }
75+ return $outFilename ;
76+ }
77+
78+ sub readClustersFile {
79+ my ( $this ) = @_ ;
80+ my $infh = new FileHandle;
81+ unless ( $infh -> open ( $this -> {' clusters_file' } , " r" ) ) { die " Could not open clusters file $! \n " };
7382 my @cols ;
7483 while ( my $line = <$infh > ) {
7584 chomp ( $line );
@@ -93,7 +102,7 @@ sub process {
93102 $cols {" Geodesic_From_Centroid" } ,
94103 $cols {" Recurrence" } );
95104 } else {
96- my ( $id , $genedrug , $aagene , $degree , $centrality , $geodesic , $recurrence ) = (split ( " \t " , $line ))[0..6 ];
105+ my ( $id , $genedrug , $aagene , $degree , $centrality , $geodesic , $recurrence ) = (split ( " \t " , $line ))[@cols ];
97106 $this -> sum( ' degrees' , $id , $degree );
98107 $this -> sum( ' centralities' , $id , $centrality );
99108 $this -> sum( ' geodesics' , $id , $geodesic );
@@ -102,7 +111,7 @@ sub process {
102111 if ( $aagene =~ / p\. / ) {
103112 if ( $geodesic == 0 ) { $this -> {centroids }-> {$id } = $genedrug ." :" .$aagene ; }
104113 $this -> sum( ' mutationmass' , $id , 1 );
105- $this -> sum( ' clustertotal ' , $id , $recurrence );
114+ $this -> sum( ' recurrencemass ' , $id , $recurrence );
106115 # if ( $families ) { &sumlist( $family , $id , $families , $recurrence ); }
107116 # if ( $doms ) { &sumlist( $domains , $id , $doms , $recurrence ); }
108117 # $this->list( 'family' , $id , $families );
@@ -122,9 +131,16 @@ sub process {
122131 }
123132 }
124133 $infh -> close ();
134+ return ;
135+ }
125136
137+ sub writeSummary {
138+ my ( $this ) = @_ ;
139+ my $outFilename = $this -> generateOutputFileName();
140+ my $fh = new FileHandle;
141+ unless ( $fh -> open ( $outFilename , " w" ) ) { die " Could not open $outFilename $! \n " ; }
126142 my $fill = " %.3f" ." \t " ;
127- $fh -> print ( " Cluster_ID\t Centroid\t Avg_Degree\t Centrality\t Avg_Geodesic\t Avg_Recurrence" );
143+ $fh -> print ( " Cluster_ID\t Centroid\t Avg_Degree\t Centrality\t Avg_Centrality \ t Avg_Geodesic\t Recurrence_Mass \t Avg_Recurrence" );
128144 $fh -> print ( " \t Mutations_(Unique_AAchanges)" );
129145 # $fh->print( "\tKnown_Mutations_(Unique_Known)" );
130146 $fh -> print ( " \t Total_Drugs\t Genes_Drugs" );
@@ -140,9 +156,11 @@ sub process {
140156 }
141157 $fh -> printf ( $fill , $this -> avg( ' degrees' , $id , ' mutationmass' ) ); # AVG_Degree (pairs)
142158 $fh -> printf ( $fill , $this -> {centralities }-> {$id } ); # Centrality (cluster closeness)
159+ $fh -> printf ( $fill , $this -> avg( ' centralities' , $id , ' recurrencemass' ) ); # Avg_Frequency (average recurrence)
143160 $fh -> printf ( $fill , $this -> avg( ' geodesics' , $id , ' mutationmass' ) ); # Avg_Geodesic (average geodesic from centroid)
144- $fh -> printf ( $fill , $this -> avg( ' clustertotal' , $id , ' mutationmass' ) ); # Avg_Frequency (average recurrence)
145- $fh -> print ( $this -> {clustertotal }-> {$id }." (" .$this -> {mutationmass }-> {$id }." )\t " ); # Mutations_(Unique_AAchanges)
161+ $fh -> printf ( $fill , $this -> {recurrencemass }-> {$id } ); # Recurrence_Mass (sum recurrence in cluster)
162+ $fh -> printf ( $fill , $this -> avg( ' recurrencemass' , $id , ' mutationmass' ) ); # Avg_Frequency (average recurrence)
163+ $fh -> print ( $this -> {recurrencemass }-> {$id }." (" .$this -> {mutationmass }-> {$id }." )\t " ); # Mutations_(Unique_AAchanges)
146164 # $fh->print( $DMKB->{$id}." (".(scalar keys %{$DMKBs->{$id}}).")\t" ); #Known_Mutations_(Unique_Known)#known druggable
147165 if ( exists $this -> {drugmass }-> {$id } ) {
148166 $fh -> print ( $this -> {drugmass }-> {$id }." \t " );
@@ -177,6 +195,7 @@ sub process {
177195 $fh -> print ( " \n " );
178196 }
179197 $fh -> close ();
198+ return ;
180199}
181200
182201sub sum {
0 commit comments