Skip to content

Commit 5823541

Browse files
shekhirinclaude
andauthored
feat(reth-bench): add gas throughput chart to python script (paradigmxyz#17572)
Co-authored-by: Claude <[email protected]>
1 parent a1a4f2d commit 5823541

File tree

2 files changed

+235
-21
lines changed

2 files changed

+235
-21
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,8 @@ recipe.json
6363
_
6464
# broken links report
6565
links-report.json
66+
67+
# Python cache
68+
__pycache__/
69+
*.py[cod]
70+
*$py.class

bin/reth-bench/scripts/compare_newpayload_latency.py

Lines changed: 230 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,32 +16,89 @@
1616
#
1717
# - A simple line graph plotting the latencies of the two files against each
1818
# other.
19+
#
20+
# - A gas per second (gas/s) chart showing throughput over time.
1921

2022

2123
import argparse
2224
import pandas as pd
2325
import matplotlib.pyplot as plt
2426
import numpy as np
2527
import sys
28+
import os
29+
from matplotlib.ticker import FuncFormatter
30+
31+
def get_output_filename(base_path, suffix=None):
32+
"""Generate output filename with optional suffix."""
33+
if suffix is None:
34+
return base_path
35+
36+
# Split the base path into directory, name, and extension
37+
dir_name = os.path.dirname(base_path)
38+
base_name = os.path.basename(base_path)
39+
name, ext = os.path.splitext(base_name)
40+
41+
# Create new filename with suffix
42+
new_name = f"{name}_{suffix}{ext}"
43+
return os.path.join(dir_name, new_name) if dir_name else new_name
44+
45+
def format_gas_units(value, pos):
46+
"""Format gas values with appropriate units (gas, Kgas, Mgas, Ggas, Tgas)."""
47+
if value == 0:
48+
return '0'
49+
50+
# Define unit thresholds and labels
51+
units = [
52+
(1e12, 'Tgas'), # Teragas
53+
(1e9, 'Ggas'), # Gigagas
54+
(1e6, 'Mgas'), # Megagas
55+
(1e3, 'Kgas'), # Kilogas
56+
(1, 'gas') # gas
57+
]
58+
59+
abs_value = abs(value)
60+
for threshold, unit in units:
61+
if abs_value >= threshold:
62+
scaled_value = value / threshold
63+
# Format with appropriate precision
64+
if scaled_value >= 100:
65+
return f'{scaled_value:.0f}{unit}/s'
66+
elif scaled_value >= 10:
67+
return f'{scaled_value:.1f}{unit}/s'
68+
else:
69+
return f'{scaled_value:.2f}{unit}/s'
70+
71+
return f'{value:.0f}gas/s'
72+
73+
def moving_average(data, window_size):
74+
"""Calculate moving average with given window size."""
75+
if window_size <= 1:
76+
return data
77+
78+
# Use pandas for efficient rolling mean calculation
79+
series = pd.Series(data)
80+
return series.rolling(window=window_size, center=True, min_periods=1).mean().values
2681

2782
def main():
2883
parser = argparse.ArgumentParser(description='Generate histogram of total_latency percent differences between two CSV files')
2984
parser.add_argument('baseline_csv', help='First CSV file, used as the baseline/control')
3085
parser.add_argument('comparison_csv', help='Second CSV file, which is being compared to the baseline')
3186
parser.add_argument('-o', '--output', default='latency.png', help='Output image file (default: latency.png)')
32-
parser.add_argument('--graphs', default='all', help='Comma-separated list of graphs to plot: histogram, line, all (default: all)')
87+
parser.add_argument('--graphs', default='all', help='Comma-separated list of graphs to plot: histogram, line, gas, all (default: all)')
88+
parser.add_argument('--average', type=int, metavar='N', help='Apply moving average over N blocks to smooth line and gas charts')
89+
parser.add_argument('--separate', action='store_true', help='Output each chart as a separate file')
3390

3491
args = parser.parse_args()
3592

3693
# Parse graph selection
3794
if args.graphs.lower() == 'all':
38-
selected_graphs = {'histogram', 'line'}
95+
selected_graphs = {'histogram', 'line', 'gas'}
3996
else:
4097
selected_graphs = set(graph.strip().lower() for graph in args.graphs.split(','))
41-
valid_graphs = {'histogram', 'line'}
98+
valid_graphs = {'histogram', 'line', 'gas'}
4299
invalid_graphs = selected_graphs - valid_graphs
43100
if invalid_graphs:
44-
print(f"Error: Invalid graph types: {', '.join(invalid_graphs)}. Valid options are: histogram, line, all", file=sys.stderr)
101+
print(f"Error: Invalid graph types: {', '.join(invalid_graphs)}. Valid options are: histogram, line, gas, all", file=sys.stderr)
45102
sys.exit(1)
46103

47104
try:
@@ -62,6 +119,15 @@ def main():
62119
print(f"Error: 'total_latency' column not found in {args.comparison_csv}", file=sys.stderr)
63120
sys.exit(1)
64121

122+
# Check for gas_used column if gas graph is selected
123+
if 'gas' in selected_graphs:
124+
if 'gas_used' not in df1.columns:
125+
print(f"Error: 'gas_used' column not found in {args.baseline_csv} (required for gas graph)", file=sys.stderr)
126+
sys.exit(1)
127+
if 'gas_used' not in df2.columns:
128+
print(f"Error: 'gas_used' column not found in {args.comparison_csv} (required for gas graph)", file=sys.stderr)
129+
sys.exit(1)
130+
65131
if len(df1) != len(df2):
66132
print("Warning: CSV files have different number of rows. Using minimum length.", file=sys.stderr)
67133
min_len = min(len(df1), len(df2))
@@ -93,23 +159,35 @@ def main():
93159
print("Error: No valid graphs selected", file=sys.stderr)
94160
sys.exit(1)
95161

96-
if num_plots == 1:
97-
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
98-
axes = [ax]
162+
# Store output filenames
163+
output_files = []
164+
165+
if args.separate:
166+
# We'll create individual figures for each graph
167+
pass
99168
else:
100-
fig, axes = plt.subplots(num_plots, 1, figsize=(12, 6 * num_plots))
169+
# Create combined figure
170+
if num_plots == 1:
171+
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
172+
axes = [ax]
173+
else:
174+
fig, axes = plt.subplots(num_plots, 1, figsize=(12, 6 * num_plots))
101175

102176
plot_idx = 0
103177

104178
# Plot histogram if selected
105179
if 'histogram' in selected_graphs:
180+
if args.separate:
181+
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
182+
else:
183+
ax = axes[plot_idx]
184+
106185
min_diff = np.floor(percent_diff.min())
107186
max_diff = np.ceil(percent_diff.max())
108187

109188
# Create histogram with 1% buckets
110189
bins = np.arange(min_diff, max_diff + 1, 1)
111190

112-
ax = axes[plot_idx]
113191
ax.hist(percent_diff, bins=bins, edgecolor='black', alpha=0.7)
114192
ax.set_xlabel('Percent Difference (%)')
115193
ax.set_ylabel('Number of Blocks')
@@ -120,47 +198,169 @@ def main():
120198
ax.axvline(mean_diff, color='red', linestyle='--', label=f'Mean: {mean_diff:.2f}%')
121199
ax.axvline(median_diff, color='orange', linestyle='--', label=f'Median: {median_diff:.2f}%')
122200
ax.legend()
123-
plot_idx += 1
201+
202+
if args.separate:
203+
plt.tight_layout()
204+
output_file = get_output_filename(args.output, 'histogram')
205+
plt.savefig(output_file, dpi=300, bbox_inches='tight')
206+
output_files.append(output_file)
207+
plt.close(fig)
208+
else:
209+
plot_idx += 1
124210

125211
# Plot line graph if selected
126212
if 'line' in selected_graphs:
213+
if args.separate:
214+
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
215+
else:
216+
ax = axes[plot_idx]
217+
127218
# Determine comparison color based on median change. The median being
128219
# negative means processing time got faster, so that becomes green.
129220
comparison_color = 'green' if median_diff < 0 else 'red'
130221

131-
ax = axes[plot_idx]
222+
# Apply moving average if requested
223+
plot_latency1 = latency1[:len(percent_diff)]
224+
plot_latency2 = latency2[:len(percent_diff)]
225+
226+
if args.average:
227+
plot_latency1 = moving_average(plot_latency1, args.average)
228+
plot_latency2 = moving_average(plot_latency2, args.average)
132229
if 'block_number' in df1.columns and 'block_number' in df2.columns:
133230
block_numbers = df1['block_number'].values[:len(percent_diff)]
134-
ax.plot(block_numbers, latency1[:len(percent_diff)], 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
135-
ax.plot(block_numbers, latency2[:len(percent_diff)], comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
231+
ax.plot(block_numbers, plot_latency1, 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
232+
ax.plot(block_numbers, plot_latency2, comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
136233
ax.set_xlabel('Block Number')
137234
ax.set_ylabel('Total Latency (ms)')
138-
ax.set_title('Total Latency vs Block Number')
235+
title = 'Total Latency vs Block Number'
236+
if args.average:
237+
title += f' ({args.average}-block moving average)'
238+
ax.set_title(title)
139239
ax.grid(True, alpha=0.3)
140240
ax.legend()
141241
else:
142242
# If no block_number column, use index
143243
indices = np.arange(len(percent_diff))
144-
ax.plot(indices, latency1[:len(percent_diff)], 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
145-
ax.plot(indices, latency2[:len(percent_diff)], comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
244+
ax.plot(indices, plot_latency1, 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
245+
ax.plot(indices, plot_latency2, comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
146246
ax.set_xlabel('Block Index')
147247
ax.set_ylabel('Total Latency (ms)')
148-
ax.set_title('Total Latency vs Block Index')
248+
title = 'Total Latency vs Block Index'
249+
if args.average:
250+
title += f' ({args.average}-block moving average)'
251+
ax.set_title(title)
149252
ax.grid(True, alpha=0.3)
150253
ax.legend()
151-
plot_idx += 1
254+
255+
if args.separate:
256+
plt.tight_layout()
257+
output_file = get_output_filename(args.output, 'line')
258+
plt.savefig(output_file, dpi=300, bbox_inches='tight')
259+
output_files.append(output_file)
260+
plt.close(fig)
261+
else:
262+
plot_idx += 1
152263

153-
plt.tight_layout()
154-
plt.savefig(args.output, dpi=300, bbox_inches='tight')
264+
# Plot gas/s graph if selected
265+
if 'gas' in selected_graphs:
266+
if args.separate:
267+
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
268+
else:
269+
ax = axes[plot_idx]
270+
271+
# Calculate gas per second (gas/s)
272+
# latency is in microseconds, so convert to seconds for gas/s calculation
273+
gas1 = df1['gas_used'].values[:len(percent_diff)]
274+
gas2 = df2['gas_used'].values[:len(percent_diff)]
275+
276+
# Convert latency from microseconds to seconds
277+
latency1_sec = df1['total_latency'].values[:len(percent_diff)] / 1_000_000.0
278+
latency2_sec = df2['total_latency'].values[:len(percent_diff)] / 1_000_000.0
279+
280+
# Calculate gas per second
281+
gas_per_sec1 = gas1 / latency1_sec
282+
gas_per_sec2 = gas2 / latency2_sec
283+
284+
# Store original values for statistics before averaging
285+
original_gas_per_sec1 = gas_per_sec1.copy()
286+
original_gas_per_sec2 = gas_per_sec2.copy()
287+
288+
# Apply moving average if requested
289+
if args.average:
290+
gas_per_sec1 = moving_average(gas_per_sec1, args.average)
291+
gas_per_sec2 = moving_average(gas_per_sec2, args.average)
292+
293+
# Calculate median gas/s for color determination (use original values)
294+
median_gas_per_sec1 = np.median(original_gas_per_sec1)
295+
median_gas_per_sec2 = np.median(original_gas_per_sec2)
296+
comparison_color = 'green' if median_gas_per_sec2 > median_gas_per_sec1 else 'red'
297+
298+
if 'block_number' in df1.columns and 'block_number' in df2.columns:
299+
block_numbers = df1['block_number'].values[:len(percent_diff)]
300+
ax.plot(block_numbers, gas_per_sec1, 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
301+
ax.plot(block_numbers, gas_per_sec2, comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
302+
ax.set_xlabel('Block Number')
303+
ax.set_ylabel('Gas Throughput')
304+
title = 'Gas Throughput vs Block Number'
305+
if args.average:
306+
title += f' ({args.average}-block moving average)'
307+
ax.set_title(title)
308+
ax.grid(True, alpha=0.3)
309+
ax.legend()
310+
311+
# Format Y-axis with gas units
312+
formatter = FuncFormatter(format_gas_units)
313+
ax.yaxis.set_major_formatter(formatter)
314+
else:
315+
# If no block_number column, use index
316+
indices = np.arange(len(percent_diff))
317+
ax.plot(indices, gas_per_sec1, 'orange', alpha=0.7, label=f'Baseline ({args.baseline_csv})')
318+
ax.plot(indices, gas_per_sec2, comparison_color, alpha=0.7, label=f'Comparison ({args.comparison_csv})')
319+
ax.set_xlabel('Block Index')
320+
ax.set_ylabel('Gas Throughput')
321+
title = 'Gas Throughput vs Block Index'
322+
if args.average:
323+
title += f' ({args.average}-block moving average)'
324+
ax.set_title(title)
325+
ax.grid(True, alpha=0.3)
326+
ax.legend()
327+
328+
# Format Y-axis with gas units
329+
formatter = FuncFormatter(format_gas_units)
330+
ax.yaxis.set_major_formatter(formatter)
331+
332+
if args.separate:
333+
plt.tight_layout()
334+
output_file = get_output_filename(args.output, 'gas')
335+
plt.savefig(output_file, dpi=300, bbox_inches='tight')
336+
output_files.append(output_file)
337+
plt.close(fig)
338+
else:
339+
plot_idx += 1
340+
341+
# Save combined figure if not using separate files
342+
if not args.separate:
343+
plt.tight_layout()
344+
plt.savefig(args.output, dpi=300, bbox_inches='tight')
345+
output_files.append(args.output)
155346

156347
# Create graph type description for output message
157348
graph_types = []
158349
if 'histogram' in selected_graphs:
159350
graph_types.append('histogram')
160351
if 'line' in selected_graphs:
161352
graph_types.append('latency graph')
353+
if 'gas' in selected_graphs:
354+
graph_types.append('gas/s graph')
162355
graph_desc = ' and '.join(graph_types)
163-
print(f"{graph_desc.capitalize()} saved to {args.output}")
356+
357+
# Print output file(s) information
358+
if args.separate:
359+
print(f"Saved {len(output_files)} separate files:")
360+
for output_file in output_files:
361+
print(f" - {output_file}")
362+
else:
363+
print(f"{graph_desc.capitalize()} saved to {args.output}")
164364

165365
# Always print statistics
166366
print(f"\nStatistics:")
@@ -170,6 +370,15 @@ def main():
170370
print(f"Min: {percent_diff.min():.2f}%")
171371
print(f"Max: {percent_diff.max():.2f}%")
172372
print(f"Total blocks analyzed: {len(percent_diff)}")
373+
374+
# Print gas/s statistics if gas data is available
375+
if 'gas' in selected_graphs:
376+
# Use original values for statistics (not averaged)
377+
print(f"\nGas/s Statistics:")
378+
print(f"Baseline median gas/s: {median_gas_per_sec1:,.0f}")
379+
print(f"Comparison median gas/s: {median_gas_per_sec2:,.0f}")
380+
gas_diff_percent = ((median_gas_per_sec2 - median_gas_per_sec1) / median_gas_per_sec1) * 100
381+
print(f"Gas/s percent change: {gas_diff_percent:+.2f}%")
173382

174383
if __name__ == '__main__':
175384
main()

0 commit comments

Comments
 (0)