chrisbraddock · chrisbraddock · May 19, 2025
diff --git a/README.md b/README.md
@@ -67,12 +67,12 @@ The output image `report.png` contains the following:
 
 ## Explanation of Recommendations
 
-The recommended settings are based on the lowest energy consumption (Watt-min) for each scenario. Energy consumption is calculated as the product of power draw and total time taken.
+The recommended settings are based on the lowest energy consumption (Watt-min) for each scenario. Energy consumption is calculated as the product of power draw and total time taken. The scripts also record **energy per token**, calculated as the instantaneous power draw divided by the token generation rate.
 
 ## File Descriptions
 
-- `training_stats.csv`: Contains columns such as `max_watt`, `tokens_per_sec`, `temperature`, `gpu_utilization`, `memory_utilization`, `loss`, and `timestamp`.
-- `inference_stats.csv`: Contains columns such as `max_watt`, `tokens_per_sec`, `temperature`, `gpu_utilization`, `memory_utilization`, and `timestamp`.
+- `training_stats.csv`: Contains columns such as `max_watt`, `tokens_per_sec`, `total_power_draw`, `energy_per_token`, `temperature`, `gpu_utilization`, `memory_utilization`, `loss`, and `timestamp`.
+- `inference_stats.csv`: Contains columns such as `max_watt`, `tokens_per_sec`, `total_power_draw`, `energy_per_token`, `temperature`, `gpu_utilization`, `memory_utilization`, and `timestamp`.
 - `generate_report.py`:
   - Loads data from CSV files.
   - Cleans data by removing outliers.

diff --git a/generate_report.py b/generate_report.py
@@ -14,6 +14,16 @@
 training_stats['timestamp'] = pd.to_datetime(training_stats['timestamp'])
 inference_stats['timestamp'] = pd.to_datetime(inference_stats['timestamp'])
 
+# Derive energy_per_token if missing
+if 'energy_per_token' not in inference_stats.columns:
+    inference_stats['energy_per_token'] = (
+        inference_stats['total_power_draw'] / inference_stats['tokens_per_sec']
+    )
+if 'energy_per_token' not in training_stats.columns:
+    training_stats['energy_per_token'] = (
+        training_stats['total_power_draw'] / training_stats['tokens_per_sec']
+    )
+
 # Calculate total time for each row
 inference_stats['time_diff'] = inference_stats['timestamp'].diff().dt.total_seconds().fillna(0)
 training_stats['time_diff'] = training_stats['timestamp'].diff().dt.total_seconds().fillna(0)
@@ -30,7 +40,8 @@ def remove_outliers(df, column):
     'temperature': 'mean',
     'gpu_utilization': 'mean',
     'memory_utilization': 'mean',
-    'time_diff': 'sum'
+    'time_diff': 'sum',
+    'energy_per_token': 'mean'
 }).reset_index()
 
 # Training Metrics
@@ -41,7 +52,8 @@ def remove_outliers(df, column):
     'gpu_utilization': 'mean',
     'memory_utilization': 'mean',
     'loss': 'mean',
-    'time_diff': 'sum'
+    'time_diff': 'sum',
+    'energy_per_token': 'mean'
 }).reset_index()
 
 # Generate summary tables
@@ -82,7 +94,7 @@ def plot_smooth_curve(ax, x, y, title, xlabel, ylabel, highlight_x=None):
     ax.grid(True)
 
 # Plotting the updated charts without outliers
-fig, axs = plt.subplots(6, 2, figsize=(20, 30))
+fig, axs = plt.subplots(7, 2, figsize=(20, 35))
 
 # Add header with current date and time
 fig.suptitle(f'Performance Metrics and Recommendations\nGenerated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', fontsize=16)
@@ -103,6 +115,9 @@ def plot_smooth_curve(ax, x, y, title, xlabel, ylabel, highlight_x=None):
 plot_smooth_curve(axs[4, 0], inference_grouped['max_watt'], inference_grouped['time_diff'],
                   'Max Power vs. Total Time (Inference)', 'Max Power (W)', 'Total Time (seconds)', highlight_x=optimal_inference_watt)
 
+plot_smooth_curve(axs[5, 0], inference_grouped['max_watt'], inference_grouped['energy_per_token'],
+                  'Max Power vs. Energy per Token (Inference)', 'Max Power (W)', 'Energy per Token (W*s/token)', highlight_x=optimal_inference_watt)
+
 # Training Metrics
 plot_smooth_curve(axs[0, 1], training_grouped['max_watt'], training_grouped['tokens_per_sec'],
                   'Max Power vs. Tokens per Second (Training)', 'Max Power (W)', 'Tokens per Second', highlight_x=optimal_training_watt)
@@ -119,18 +134,22 @@ def plot_smooth_curve(ax, x, y, title, xlabel, ylabel, highlight_x=None):
 plot_smooth_curve(axs[4, 1], training_grouped['max_watt'], training_grouped['time_diff'],
                   'Max Power vs. Total Time (Training)', 'Max Power (W)', 'Total Time (seconds)', highlight_x=optimal_training_watt)
 
+plot_smooth_curve(axs[5, 1], training_grouped['max_watt'], training_grouped['energy_per_token'],
+                  'Max Power vs. Energy per Token (Training)', 'Max Power (W)', 'Energy per Token (W*s/token)', highlight_x=optimal_training_watt)
+
 # Summary with recommended settings
 summary_text = (
     f"Recommended Settings:\n"
     f"Optimal Max Power for Training: {optimal_training_watt}W\n"
     f"Optimal Max Power for Inference: {optimal_inference_watt}W\n\n"
     "Recommendations are based on the lowest energy consumption (Watt-min) for each scenario.\n"
-    "Energy consumption is calculated as the product of power draw and total time taken."
+    "Energy consumption is calculated as the product of power draw and total time taken.\n"
+    "Energy per token is the instantaneous power divided by token generation rate."
 )
 
-axs[5, 0].axis('off')
-axs[5, 1].text(0.5, 0.5, summary_text, ha='center', va='center', fontsize=12, wrap=True)
-axs[5, 1].axis('off')
+axs[6, 0].axis('off')
+axs[6, 1].text(0.5, 0.5, summary_text, ha='center', va='center', fontsize=12, wrap=True)
+axs[6, 1].axis('off')
 
 plt.tight_layout(rect=[0, 0, 1, 0.96])
 plt.savefig('report.png')

diff --git a/llm_inference.py b/llm_inference.py
@@ -62,7 +62,9 @@ def load_across_gpus(seq_length, batch_size, model_variant, max_iterations, call
     # Get sample GPU metrics to dynamically generate headers
     sample_metrics = get_gpu_metrics()[0]
     gpu_headers = list(sample_metrics.keys())
-    headers = ['timestamp', 'tokens_per_sec'] + gpu_headers + ['max_watt']
+    headers = ['timestamp', 'tokens_per_sec'] + gpu_headers + [
+        'max_watt', 'total_power_draw', 'energy_per_token'
+    ]
 
     for iteration in range(max_iterations):
         model.eval()
@@ -91,10 +93,18 @@ def load_across_gpus(seq_length, batch_size, model_variant, max_iterations, call
             from gpu_metrics_utils import collect_power_draw_all_gpus
             total_power = collect_power_draw_all_gpus()
             gpu_metrics = get_gpu_metrics()[0]
-            data = [timestamp, tokens_per_sec] + list(gpu_metrics.values()) + [MAX_WATT, total_power]
+            energy_per_token = total_power / tokens_per_sec if tokens_per_sec else 0
+            data = [
+                timestamp,
+                tokens_per_sec,
+                *list(gpu_metrics.values()),
+                MAX_WATT,
+                total_power,
+                energy_per_token,
+            ]
             if callback:
                 data = callback(data)
-            log_statistics(LOG_FILE, headers + ['total_power_draw'], data)
+            log_statistics(LOG_FILE, headers, data)
             logger.info(f"Logged statistics: {data}")
 
     shutdown_nvml()

diff --git a/llm_training.py b/llm_training.py
@@ -73,7 +73,9 @@ def load_across_gpus(gpu_ids, batch_size, seq_length, epochs, learning_rate, cal
     # Get sample GPU metrics to dynamically generate headers
     sample_metrics = get_gpu_metrics()[0]
     gpu_headers = list(sample_metrics.keys())
-    headers = ['timestamp', 'epoch', 'iteration', 'batch', 'loss', 'tokens_per_sec'] + gpu_headers + ['max_watt']
+    headers = ['timestamp', 'epoch', 'iteration', 'batch', 'loss', 'tokens_per_sec'] + gpu_headers + [
+        'max_watt', 'total_power_draw', 'energy_per_token'
+    ]
 
     model.train()
     for epoch in range(epochs):
@@ -101,10 +103,22 @@ def load_across_gpus(gpu_ids, batch_size, seq_length, epochs, learning_rate, cal
             from gpu_metrics_utils import collect_power_draw_all_gpus
             total_power = collect_power_draw_all_gpus()
             gpu_metrics = get_gpu_metrics()[0]
-            data = [timestamp, epoch + 1, iteration, i // batch_size + 1, loss.item(), tokens_per_sec] + list(gpu_metrics.values()) + [MAX_WATT, total_power]
+            energy_per_token = total_power / tokens_per_sec if tokens_per_sec else 0
+            data = [
+                timestamp,
+                epoch + 1,
+                iteration,
+                i // batch_size + 1,
+                loss.item(),
+                tokens_per_sec,
+                *list(gpu_metrics.values()),
+                MAX_WATT,
+                total_power,
+                energy_per_token,
+            ]
             if callback:
                 data = callback(data)
-            log_statistics(log_file, headers + ['total_power_draw'], data)
+            log_statistics(log_file, headers, data)
             logger.info(f"Logged statistics: {data}")
 
     shutdown_nvml()

diff --git a/recommend.py b/recommend.py
@@ -8,6 +8,15 @@
 training_stats['timestamp'] = pd.to_datetime(training_stats['timestamp'])
 inference_stats['timestamp'] = pd.to_datetime(inference_stats['timestamp'])
 
+if 'energy_per_token' not in inference_stats.columns:
+    inference_stats['energy_per_token'] = (
+        inference_stats['total_power_draw'] / inference_stats['tokens_per_sec']
+    )
+if 'energy_per_token' not in training_stats.columns:
+    training_stats['energy_per_token'] = (
+        training_stats['total_power_draw'] / training_stats['tokens_per_sec']
+    )
+
 # Function to calculate summary statistics
 def calculate_summary(data):
     numeric_data = data.select_dtypes(include='number')