1616#
1717# - A simple line graph plotting the latencies of the two files against each
1818# other.
19+ #
20+ # - A gas per second (gas/s) chart showing throughput over time.
1921
2022
2123import argparse
2224import pandas as pd
2325import matplotlib .pyplot as plt
2426import numpy as np
2527import sys
28+ import os
29+ from matplotlib .ticker import FuncFormatter
30+
31+ def get_output_filename (base_path , suffix = None ):
32+ """Generate output filename with optional suffix."""
33+ if suffix is None :
34+ return base_path
35+
36+ # Split the base path into directory, name, and extension
37+ dir_name = os .path .dirname (base_path )
38+ base_name = os .path .basename (base_path )
39+ name , ext = os .path .splitext (base_name )
40+
41+ # Create new filename with suffix
42+ new_name = f"{ name } _{ suffix } { ext } "
43+ return os .path .join (dir_name , new_name ) if dir_name else new_name
44+
45+ def format_gas_units (value , pos ):
46+ """Format gas values with appropriate units (gas, Kgas, Mgas, Ggas, Tgas)."""
47+ if value == 0 :
48+ return '0'
49+
50+ # Define unit thresholds and labels
51+ units = [
52+ (1e12 , 'Tgas' ), # Teragas
53+ (1e9 , 'Ggas' ), # Gigagas
54+ (1e6 , 'Mgas' ), # Megagas
55+ (1e3 , 'Kgas' ), # Kilogas
56+ (1 , 'gas' ) # gas
57+ ]
58+
59+ abs_value = abs (value )
60+ for threshold , unit in units :
61+ if abs_value >= threshold :
62+ scaled_value = value / threshold
63+ # Format with appropriate precision
64+ if scaled_value >= 100 :
65+ return f'{ scaled_value :.0f} { unit } /s'
66+ elif scaled_value >= 10 :
67+ return f'{ scaled_value :.1f} { unit } /s'
68+ else :
69+ return f'{ scaled_value :.2f} { unit } /s'
70+
71+ return f'{ value :.0f} gas/s'
72+
73+ def moving_average (data , window_size ):
74+ """Calculate moving average with given window size."""
75+ if window_size <= 1 :
76+ return data
77+
78+ # Use pandas for efficient rolling mean calculation
79+ series = pd .Series (data )
80+ return series .rolling (window = window_size , center = True , min_periods = 1 ).mean ().values
2681
2782def main ():
2883 parser = argparse .ArgumentParser (description = 'Generate histogram of total_latency percent differences between two CSV files' )
2984 parser .add_argument ('baseline_csv' , help = 'First CSV file, used as the baseline/control' )
3085 parser .add_argument ('comparison_csv' , help = 'Second CSV file, which is being compared to the baseline' )
3186 parser .add_argument ('-o' , '--output' , default = 'latency.png' , help = 'Output image file (default: latency.png)' )
32- parser .add_argument ('--graphs' , default = 'all' , help = 'Comma-separated list of graphs to plot: histogram, line, all (default: all)' )
87+ parser .add_argument ('--graphs' , default = 'all' , help = 'Comma-separated list of graphs to plot: histogram, line, gas, all (default: all)' )
88+ parser .add_argument ('--average' , type = int , metavar = 'N' , help = 'Apply moving average over N blocks to smooth line and gas charts' )
89+ parser .add_argument ('--separate' , action = 'store_true' , help = 'Output each chart as a separate file' )
3390
3491 args = parser .parse_args ()
3592
3693 # Parse graph selection
3794 if args .graphs .lower () == 'all' :
38- selected_graphs = {'histogram' , 'line' }
95+ selected_graphs = {'histogram' , 'line' , 'gas' }
3996 else :
4097 selected_graphs = set (graph .strip ().lower () for graph in args .graphs .split (',' ))
41- valid_graphs = {'histogram' , 'line' }
98+ valid_graphs = {'histogram' , 'line' , 'gas' }
4299 invalid_graphs = selected_graphs - valid_graphs
43100 if invalid_graphs :
44- print (f"Error: Invalid graph types: { ', ' .join (invalid_graphs )} . Valid options are: histogram, line, all" , file = sys .stderr )
101+ print (f"Error: Invalid graph types: { ', ' .join (invalid_graphs )} . Valid options are: histogram, line, gas, all" , file = sys .stderr )
45102 sys .exit (1 )
46103
47104 try :
@@ -62,6 +119,15 @@ def main():
62119 print (f"Error: 'total_latency' column not found in { args .comparison_csv } " , file = sys .stderr )
63120 sys .exit (1 )
64121
122+ # Check for gas_used column if gas graph is selected
123+ if 'gas' in selected_graphs :
124+ if 'gas_used' not in df1 .columns :
125+ print (f"Error: 'gas_used' column not found in { args .baseline_csv } (required for gas graph)" , file = sys .stderr )
126+ sys .exit (1 )
127+ if 'gas_used' not in df2 .columns :
128+ print (f"Error: 'gas_used' column not found in { args .comparison_csv } (required for gas graph)" , file = sys .stderr )
129+ sys .exit (1 )
130+
65131 if len (df1 ) != len (df2 ):
66132 print ("Warning: CSV files have different number of rows. Using minimum length." , file = sys .stderr )
67133 min_len = min (len (df1 ), len (df2 ))
@@ -93,23 +159,35 @@ def main():
93159 print ("Error: No valid graphs selected" , file = sys .stderr )
94160 sys .exit (1 )
95161
96- if num_plots == 1 :
97- fig , ax = plt .subplots (1 , 1 , figsize = (12 , 6 ))
98- axes = [ax ]
162+ # Store output filenames
163+ output_files = []
164+
165+ if args .separate :
166+ # We'll create individual figures for each graph
167+ pass
99168 else :
100- fig , axes = plt .subplots (num_plots , 1 , figsize = (12 , 6 * num_plots ))
169+ # Create combined figure
170+ if num_plots == 1 :
171+ fig , ax = plt .subplots (1 , 1 , figsize = (12 , 6 ))
172+ axes = [ax ]
173+ else :
174+ fig , axes = plt .subplots (num_plots , 1 , figsize = (12 , 6 * num_plots ))
101175
102176 plot_idx = 0
103177
104178 # Plot histogram if selected
105179 if 'histogram' in selected_graphs :
180+ if args .separate :
181+ fig , ax = plt .subplots (1 , 1 , figsize = (12 , 6 ))
182+ else :
183+ ax = axes [plot_idx ]
184+
106185 min_diff = np .floor (percent_diff .min ())
107186 max_diff = np .ceil (percent_diff .max ())
108187
109188 # Create histogram with 1% buckets
110189 bins = np .arange (min_diff , max_diff + 1 , 1 )
111190
112- ax = axes [plot_idx ]
113191 ax .hist (percent_diff , bins = bins , edgecolor = 'black' , alpha = 0.7 )
114192 ax .set_xlabel ('Percent Difference (%)' )
115193 ax .set_ylabel ('Number of Blocks' )
@@ -120,47 +198,169 @@ def main():
120198 ax .axvline (mean_diff , color = 'red' , linestyle = '--' , label = f'Mean: { mean_diff :.2f} %' )
121199 ax .axvline (median_diff , color = 'orange' , linestyle = '--' , label = f'Median: { median_diff :.2f} %' )
122200 ax .legend ()
123- plot_idx += 1
201+
202+ if args .separate :
203+ plt .tight_layout ()
204+ output_file = get_output_filename (args .output , 'histogram' )
205+ plt .savefig (output_file , dpi = 300 , bbox_inches = 'tight' )
206+ output_files .append (output_file )
207+ plt .close (fig )
208+ else :
209+ plot_idx += 1
124210
125211 # Plot line graph if selected
126212 if 'line' in selected_graphs :
213+ if args .separate :
214+ fig , ax = plt .subplots (1 , 1 , figsize = (12 , 6 ))
215+ else :
216+ ax = axes [plot_idx ]
217+
127218 # Determine comparison color based on median change. The median being
128219 # negative means processing time got faster, so that becomes green.
129220 comparison_color = 'green' if median_diff < 0 else 'red'
130221
131- ax = axes [plot_idx ]
222+ # Apply moving average if requested
223+ plot_latency1 = latency1 [:len (percent_diff )]
224+ plot_latency2 = latency2 [:len (percent_diff )]
225+
226+ if args .average :
227+ plot_latency1 = moving_average (plot_latency1 , args .average )
228+ plot_latency2 = moving_average (plot_latency2 , args .average )
132229 if 'block_number' in df1 .columns and 'block_number' in df2 .columns :
133230 block_numbers = df1 ['block_number' ].values [:len (percent_diff )]
134- ax .plot (block_numbers , latency1 [: len ( percent_diff )] , 'orange' , alpha = 0.7 , label = f'Baseline ({ args .baseline_csv } )' )
135- ax .plot (block_numbers , latency2 [: len ( percent_diff )] , comparison_color , alpha = 0.7 , label = f'Comparison ({ args .comparison_csv } )' )
231+ ax .plot (block_numbers , plot_latency1 , 'orange' , alpha = 0.7 , label = f'Baseline ({ args .baseline_csv } )' )
232+ ax .plot (block_numbers , plot_latency2 , comparison_color , alpha = 0.7 , label = f'Comparison ({ args .comparison_csv } )' )
136233 ax .set_xlabel ('Block Number' )
137234 ax .set_ylabel ('Total Latency (ms)' )
138- ax .set_title ('Total Latency vs Block Number' )
235+ title = 'Total Latency vs Block Number'
236+ if args .average :
237+ title += f' ({ args .average } -block moving average)'
238+ ax .set_title (title )
139239 ax .grid (True , alpha = 0.3 )
140240 ax .legend ()
141241 else :
142242 # If no block_number column, use index
143243 indices = np .arange (len (percent_diff ))
144- ax .plot (indices , latency1 [: len ( percent_diff )] , 'orange' , alpha = 0.7 , label = f'Baseline ({ args .baseline_csv } )' )
145- ax .plot (indices , latency2 [: len ( percent_diff )] , comparison_color , alpha = 0.7 , label = f'Comparison ({ args .comparison_csv } )' )
244+ ax .plot (indices , plot_latency1 , 'orange' , alpha = 0.7 , label = f'Baseline ({ args .baseline_csv } )' )
245+ ax .plot (indices , plot_latency2 , comparison_color , alpha = 0.7 , label = f'Comparison ({ args .comparison_csv } )' )
146246 ax .set_xlabel ('Block Index' )
147247 ax .set_ylabel ('Total Latency (ms)' )
148- ax .set_title ('Total Latency vs Block Index' )
248+ title = 'Total Latency vs Block Index'
249+ if args .average :
250+ title += f' ({ args .average } -block moving average)'
251+ ax .set_title (title )
149252 ax .grid (True , alpha = 0.3 )
150253 ax .legend ()
151- plot_idx += 1
254+
255+ if args .separate :
256+ plt .tight_layout ()
257+ output_file = get_output_filename (args .output , 'line' )
258+ plt .savefig (output_file , dpi = 300 , bbox_inches = 'tight' )
259+ output_files .append (output_file )
260+ plt .close (fig )
261+ else :
262+ plot_idx += 1
152263
153- plt .tight_layout ()
154- plt .savefig (args .output , dpi = 300 , bbox_inches = 'tight' )
264+ # Plot gas/s graph if selected
265+ if 'gas' in selected_graphs :
266+ if args .separate :
267+ fig , ax = plt .subplots (1 , 1 , figsize = (12 , 6 ))
268+ else :
269+ ax = axes [plot_idx ]
270+
271+ # Calculate gas per second (gas/s)
272+ # latency is in microseconds, so convert to seconds for gas/s calculation
273+ gas1 = df1 ['gas_used' ].values [:len (percent_diff )]
274+ gas2 = df2 ['gas_used' ].values [:len (percent_diff )]
275+
276+ # Convert latency from microseconds to seconds
277+ latency1_sec = df1 ['total_latency' ].values [:len (percent_diff )] / 1_000_000.0
278+ latency2_sec = df2 ['total_latency' ].values [:len (percent_diff )] / 1_000_000.0
279+
280+ # Calculate gas per second
281+ gas_per_sec1 = gas1 / latency1_sec
282+ gas_per_sec2 = gas2 / latency2_sec
283+
284+ # Store original values for statistics before averaging
285+ original_gas_per_sec1 = gas_per_sec1 .copy ()
286+ original_gas_per_sec2 = gas_per_sec2 .copy ()
287+
288+ # Apply moving average if requested
289+ if args .average :
290+ gas_per_sec1 = moving_average (gas_per_sec1 , args .average )
291+ gas_per_sec2 = moving_average (gas_per_sec2 , args .average )
292+
293+ # Calculate median gas/s for color determination (use original values)
294+ median_gas_per_sec1 = np .median (original_gas_per_sec1 )
295+ median_gas_per_sec2 = np .median (original_gas_per_sec2 )
296+ comparison_color = 'green' if median_gas_per_sec2 > median_gas_per_sec1 else 'red'
297+
298+ if 'block_number' in df1 .columns and 'block_number' in df2 .columns :
299+ block_numbers = df1 ['block_number' ].values [:len (percent_diff )]
300+ ax .plot (block_numbers , gas_per_sec1 , 'orange' , alpha = 0.7 , label = f'Baseline ({ args .baseline_csv } )' )
301+ ax .plot (block_numbers , gas_per_sec2 , comparison_color , alpha = 0.7 , label = f'Comparison ({ args .comparison_csv } )' )
302+ ax .set_xlabel ('Block Number' )
303+ ax .set_ylabel ('Gas Throughput' )
304+ title = 'Gas Throughput vs Block Number'
305+ if args .average :
306+ title += f' ({ args .average } -block moving average)'
307+ ax .set_title (title )
308+ ax .grid (True , alpha = 0.3 )
309+ ax .legend ()
310+
311+ # Format Y-axis with gas units
312+ formatter = FuncFormatter (format_gas_units )
313+ ax .yaxis .set_major_formatter (formatter )
314+ else :
315+ # If no block_number column, use index
316+ indices = np .arange (len (percent_diff ))
317+ ax .plot (indices , gas_per_sec1 , 'orange' , alpha = 0.7 , label = f'Baseline ({ args .baseline_csv } )' )
318+ ax .plot (indices , gas_per_sec2 , comparison_color , alpha = 0.7 , label = f'Comparison ({ args .comparison_csv } )' )
319+ ax .set_xlabel ('Block Index' )
320+ ax .set_ylabel ('Gas Throughput' )
321+ title = 'Gas Throughput vs Block Index'
322+ if args .average :
323+ title += f' ({ args .average } -block moving average)'
324+ ax .set_title (title )
325+ ax .grid (True , alpha = 0.3 )
326+ ax .legend ()
327+
328+ # Format Y-axis with gas units
329+ formatter = FuncFormatter (format_gas_units )
330+ ax .yaxis .set_major_formatter (formatter )
331+
332+ if args .separate :
333+ plt .tight_layout ()
334+ output_file = get_output_filename (args .output , 'gas' )
335+ plt .savefig (output_file , dpi = 300 , bbox_inches = 'tight' )
336+ output_files .append (output_file )
337+ plt .close (fig )
338+ else :
339+ plot_idx += 1
340+
341+ # Save combined figure if not using separate files
342+ if not args .separate :
343+ plt .tight_layout ()
344+ plt .savefig (args .output , dpi = 300 , bbox_inches = 'tight' )
345+ output_files .append (args .output )
155346
156347 # Create graph type description for output message
157348 graph_types = []
158349 if 'histogram' in selected_graphs :
159350 graph_types .append ('histogram' )
160351 if 'line' in selected_graphs :
161352 graph_types .append ('latency graph' )
353+ if 'gas' in selected_graphs :
354+ graph_types .append ('gas/s graph' )
162355 graph_desc = ' and ' .join (graph_types )
163- print (f"{ graph_desc .capitalize ()} saved to { args .output } " )
356+
357+ # Print output file(s) information
358+ if args .separate :
359+ print (f"Saved { len (output_files )} separate files:" )
360+ for output_file in output_files :
361+ print (f" - { output_file } " )
362+ else :
363+ print (f"{ graph_desc .capitalize ()} saved to { args .output } " )
164364
165365 # Always print statistics
166366 print (f"\n Statistics:" )
@@ -170,6 +370,15 @@ def main():
170370 print (f"Min: { percent_diff .min ():.2f} %" )
171371 print (f"Max: { percent_diff .max ():.2f} %" )
172372 print (f"Total blocks analyzed: { len (percent_diff )} " )
373+
374+ # Print gas/s statistics if gas data is available
375+ if 'gas' in selected_graphs :
376+ # Use original values for statistics (not averaged)
377+ print (f"\n Gas/s Statistics:" )
378+ print (f"Baseline median gas/s: { median_gas_per_sec1 :,.0f} " )
379+ print (f"Comparison median gas/s: { median_gas_per_sec2 :,.0f} " )
380+ gas_diff_percent = ((median_gas_per_sec2 - median_gas_per_sec1 ) / median_gas_per_sec1 ) * 100
381+ print (f"Gas/s percent change: { gas_diff_percent :+.2f} %" )
173382
174383if __name__ == '__main__' :
175384 main ()
0 commit comments