Skip to content

Add energy per token metric #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ The output image `report.png` contains the following:

## Explanation of Recommendations

The recommended settings are based on the lowest energy consumption (Watt-min) for each scenario. Energy consumption is calculated as the product of power draw and total time taken.
The recommended settings are based on the lowest energy consumption (Watt-min) for each scenario. Energy consumption is calculated as the product of power draw and total time taken. The scripts also record **energy per token**, calculated as the instantaneous power draw divided by the token generation rate.

## File Descriptions

- `training_stats.csv`: Contains columns such as `max_watt`, `tokens_per_sec`, `temperature`, `gpu_utilization`, `memory_utilization`, `loss`, and `timestamp`.
- `inference_stats.csv`: Contains columns such as `max_watt`, `tokens_per_sec`, `temperature`, `gpu_utilization`, `memory_utilization`, and `timestamp`.
- `training_stats.csv`: Contains columns such as `max_watt`, `tokens_per_sec`, `total_power_draw`, `energy_per_token`, `temperature`, `gpu_utilization`, `memory_utilization`, `loss`, and `timestamp`.
- `inference_stats.csv`: Contains columns such as `max_watt`, `tokens_per_sec`, `total_power_draw`, `energy_per_token`, `temperature`, `gpu_utilization`, `memory_utilization`, and `timestamp`.
- `generate_report.py`:
- Loads data from CSV files.
- Cleans data by removing outliers.
Expand Down
33 changes: 26 additions & 7 deletions generate_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
training_stats['timestamp'] = pd.to_datetime(training_stats['timestamp'])
inference_stats['timestamp'] = pd.to_datetime(inference_stats['timestamp'])

# Derive energy_per_token if missing
if 'energy_per_token' not in inference_stats.columns:
inference_stats['energy_per_token'] = (
inference_stats['total_power_draw'] / inference_stats['tokens_per_sec']
)
if 'energy_per_token' not in training_stats.columns:
training_stats['energy_per_token'] = (
training_stats['total_power_draw'] / training_stats['tokens_per_sec']
)

# Calculate total time for each row
inference_stats['time_diff'] = inference_stats['timestamp'].diff().dt.total_seconds().fillna(0)
training_stats['time_diff'] = training_stats['timestamp'].diff().dt.total_seconds().fillna(0)
Expand All @@ -30,7 +40,8 @@ def remove_outliers(df, column):
'temperature': 'mean',
'gpu_utilization': 'mean',
'memory_utilization': 'mean',
'time_diff': 'sum'
'time_diff': 'sum',
'energy_per_token': 'mean'
}).reset_index()

# Training Metrics
Expand All @@ -41,7 +52,8 @@ def remove_outliers(df, column):
'gpu_utilization': 'mean',
'memory_utilization': 'mean',
'loss': 'mean',
'time_diff': 'sum'
'time_diff': 'sum',
'energy_per_token': 'mean'
}).reset_index()

# Generate summary tables
Expand Down Expand Up @@ -82,7 +94,7 @@ def plot_smooth_curve(ax, x, y, title, xlabel, ylabel, highlight_x=None):
ax.grid(True)

# Plotting the updated charts without outliers
fig, axs = plt.subplots(6, 2, figsize=(20, 30))
fig, axs = plt.subplots(7, 2, figsize=(20, 35))

# Add header with current date and time
fig.suptitle(f'Performance Metrics and Recommendations\nGenerated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', fontsize=16)
Expand All @@ -103,6 +115,9 @@ def plot_smooth_curve(ax, x, y, title, xlabel, ylabel, highlight_x=None):
plot_smooth_curve(axs[4, 0], inference_grouped['max_watt'], inference_grouped['time_diff'],
'Max Power vs. Total Time (Inference)', 'Max Power (W)', 'Total Time (seconds)', highlight_x=optimal_inference_watt)

plot_smooth_curve(axs[5, 0], inference_grouped['max_watt'], inference_grouped['energy_per_token'],
'Max Power vs. Energy per Token (Inference)', 'Max Power (W)', 'Energy per Token (W*s/token)', highlight_x=optimal_inference_watt)

# Training Metrics
plot_smooth_curve(axs[0, 1], training_grouped['max_watt'], training_grouped['tokens_per_sec'],
'Max Power vs. Tokens per Second (Training)', 'Max Power (W)', 'Tokens per Second', highlight_x=optimal_training_watt)
Expand All @@ -119,18 +134,22 @@ def plot_smooth_curve(ax, x, y, title, xlabel, ylabel, highlight_x=None):
plot_smooth_curve(axs[4, 1], training_grouped['max_watt'], training_grouped['time_diff'],
'Max Power vs. Total Time (Training)', 'Max Power (W)', 'Total Time (seconds)', highlight_x=optimal_training_watt)

plot_smooth_curve(axs[5, 1], training_grouped['max_watt'], training_grouped['energy_per_token'],
'Max Power vs. Energy per Token (Training)', 'Max Power (W)', 'Energy per Token (W*s/token)', highlight_x=optimal_training_watt)

# Summary with recommended settings
summary_text = (
f"Recommended Settings:\n"
f"Optimal Max Power for Training: {optimal_training_watt}W\n"
f"Optimal Max Power for Inference: {optimal_inference_watt}W\n\n"
"Recommendations are based on the lowest energy consumption (Watt-min) for each scenario.\n"
"Energy consumption is calculated as the product of power draw and total time taken."
"Energy consumption is calculated as the product of power draw and total time taken.\n"
"Energy per token is the instantaneous power divided by token generation rate."
)

axs[5, 0].axis('off')
axs[5, 1].text(0.5, 0.5, summary_text, ha='center', va='center', fontsize=12, wrap=True)
axs[5, 1].axis('off')
axs[6, 0].axis('off')
axs[6, 1].text(0.5, 0.5, summary_text, ha='center', va='center', fontsize=12, wrap=True)
axs[6, 1].axis('off')

plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.savefig('report.png')
Expand Down
16 changes: 13 additions & 3 deletions llm_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def load_across_gpus(seq_length, batch_size, model_variant, max_iterations, call
# Get sample GPU metrics to dynamically generate headers
sample_metrics = get_gpu_metrics()[0]
gpu_headers = list(sample_metrics.keys())
headers = ['timestamp', 'tokens_per_sec'] + gpu_headers + ['max_watt']
headers = ['timestamp', 'tokens_per_sec'] + gpu_headers + [
'max_watt', 'total_power_draw', 'energy_per_token'
]

for iteration in range(max_iterations):
model.eval()
Expand Down Expand Up @@ -91,10 +93,18 @@ def load_across_gpus(seq_length, batch_size, model_variant, max_iterations, call
from gpu_metrics_utils import collect_power_draw_all_gpus
total_power = collect_power_draw_all_gpus()
gpu_metrics = get_gpu_metrics()[0]
data = [timestamp, tokens_per_sec] + list(gpu_metrics.values()) + [MAX_WATT, total_power]
energy_per_token = total_power / tokens_per_sec if tokens_per_sec else 0
data = [
timestamp,
tokens_per_sec,
*list(gpu_metrics.values()),
MAX_WATT,
total_power,
energy_per_token,
]
if callback:
data = callback(data)
log_statistics(LOG_FILE, headers + ['total_power_draw'], data)
log_statistics(LOG_FILE, headers, data)
logger.info(f"Logged statistics: {data}")

shutdown_nvml()
Expand Down
20 changes: 17 additions & 3 deletions llm_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ def load_across_gpus(gpu_ids, batch_size, seq_length, epochs, learning_rate, cal
# Get sample GPU metrics to dynamically generate headers
sample_metrics = get_gpu_metrics()[0]
gpu_headers = list(sample_metrics.keys())
headers = ['timestamp', 'epoch', 'iteration', 'batch', 'loss', 'tokens_per_sec'] + gpu_headers + ['max_watt']
headers = ['timestamp', 'epoch', 'iteration', 'batch', 'loss', 'tokens_per_sec'] + gpu_headers + [
'max_watt', 'total_power_draw', 'energy_per_token'
]

model.train()
for epoch in range(epochs):
Expand Down Expand Up @@ -101,10 +103,22 @@ def load_across_gpus(gpu_ids, batch_size, seq_length, epochs, learning_rate, cal
from gpu_metrics_utils import collect_power_draw_all_gpus
total_power = collect_power_draw_all_gpus()
gpu_metrics = get_gpu_metrics()[0]
data = [timestamp, epoch + 1, iteration, i // batch_size + 1, loss.item(), tokens_per_sec] + list(gpu_metrics.values()) + [MAX_WATT, total_power]
energy_per_token = total_power / tokens_per_sec if tokens_per_sec else 0
data = [
timestamp,
epoch + 1,
iteration,
i // batch_size + 1,
loss.item(),
tokens_per_sec,
*list(gpu_metrics.values()),
MAX_WATT,
total_power,
energy_per_token,
]
if callback:
data = callback(data)
log_statistics(log_file, headers + ['total_power_draw'], data)
log_statistics(log_file, headers, data)
logger.info(f"Logged statistics: {data}")

shutdown_nvml()
Expand Down
9 changes: 9 additions & 0 deletions recommend.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@
training_stats['timestamp'] = pd.to_datetime(training_stats['timestamp'])
inference_stats['timestamp'] = pd.to_datetime(inference_stats['timestamp'])

if 'energy_per_token' not in inference_stats.columns:
inference_stats['energy_per_token'] = (
inference_stats['total_power_draw'] / inference_stats['tokens_per_sec']
)
if 'energy_per_token' not in training_stats.columns:
training_stats['energy_per_token'] = (
training_stats['total_power_draw'] / training_stats['tokens_per_sec']
)

# Function to calculate summary statistics
def calculate_summary(data):
numeric_data = data.select_dtypes(include='number')
Expand Down