@@ -86,6 +86,66 @@ class OpenAIMonitor:
86
86
87
87
"""
88
88
89
+ # Last update: 2024-01-05
90
+ COST_PER_TOKEN = {
91
+ "babbage-002" : {
92
+ "input" : 0.0004e-3 ,
93
+ "output" : 0.0004e-3 ,
94
+ },
95
+ "davinci-002" : {
96
+ "input" : 0.002e-3 ,
97
+ "output" : 0.002e-3 ,
98
+ },
99
+ "gpt-3.5-turbo" : {
100
+ "input" : 0.003e-3 ,
101
+ "output" : 0.006e-3 ,
102
+ },
103
+ "gpt-3.5-turbo-0301" : {
104
+ "input" : 0.0015e-3 ,
105
+ "output" : 0.002e-3 ,
106
+ },
107
+ "gpt-3.5-turbo-0613" : {
108
+ "input" : 0.0015e-3 ,
109
+ "output" : 0.002e-3 ,
110
+ },
111
+ "gpt-3.5-turbo-1106" : {
112
+ "input" : 0.001e-3 ,
113
+ "output" : 0.002e-3 ,
114
+ },
115
+ "gpt-3.5-turbo-16k-0613" : {
116
+ "input" : 0.003e-3 ,
117
+ "output" : 0.004e-3 ,
118
+ },
119
+ "gpt-3.5-turbo-instruct" : {
120
+ "input" : 0.0015e-3 ,
121
+ "output" : 0.002e-3 ,
122
+ },
123
+ "gpt-4" : {
124
+ "input" : 0.03e-3 ,
125
+ "output" : 0.06e-3 ,
126
+ },
127
+ "gpt-4-0314" : {
128
+ "input" : 0.03e-3 ,
129
+ "output" : 0.06e-3 ,
130
+ },
131
+ "gpt-4-1106-preview" : {
132
+ "input" : 0.01e-3 ,
133
+ "output" : 0.03e-3 ,
134
+ },
135
+ "gpt-4-1106-vision-preview" : {
136
+ "input" : 0.01e-3 ,
137
+ "output" : 0.03e-3 ,
138
+ },
139
+ "gpt-4-32k" : {
140
+ "input" : 0.06e-3 ,
141
+ "output" : 0.12e-3 ,
142
+ },
143
+ "gpt-4-32k-0314" : {
144
+ "input" : 0.06e-3 ,
145
+ "output" : 0.12e-3 ,
146
+ },
147
+ }
148
+
89
149
def __init__ (
90
150
self ,
91
151
publish : bool = False ,
@@ -207,15 +267,23 @@ def modified_create_chat_completion(*args, **kwargs) -> str:
207
267
prompt , input_data = self .format_input (kwargs ["messages" ])
208
268
output_data = response .choices [0 ].message .content .strip ()
209
269
num_of_tokens = response .usage .total_tokens
270
+ cost = self .get_cost_estimate (
271
+ model = kwargs .get ("model" ),
272
+ num_input_tokens = response .usage .prompt_tokens ,
273
+ num_output_tokens = response .usage .completion_tokens ,
274
+ )
275
+
210
276
config = self .data_config .copy ()
211
277
config ["prompt" ] = prompt
212
278
config .update ({"inputVariableNames" : list (input_data .keys ())})
279
+ config ["costColumnName" ] = "cost"
213
280
214
281
self ._append_row_to_df (
215
282
input_data = input_data ,
216
283
output_data = output_data ,
217
284
num_of_tokens = num_of_tokens ,
218
285
latency = latency ,
286
+ cost = cost ,
219
287
)
220
288
221
289
self ._handle_data_publishing (config = config )
@@ -243,15 +311,24 @@ def modified_create_completion(*args, **kwargs):
243
311
for input_data , choices in zip (prompts , choices_splits ):
244
312
output_data = choices [0 ].text .strip ()
245
313
num_of_tokens = int (response .usage .total_tokens / len (prompts ))
314
+ cost = self .get_cost_estimate (
315
+ model = kwargs .get ("model" ),
316
+ num_input_tokens = response .usage .prompt_tokens ,
317
+ num_output_tokens = response .usage .completion_tokens ,
318
+ )
246
319
247
320
self ._append_row_to_df (
248
321
input_data = {"message" : input_data },
249
322
output_data = output_data ,
250
323
num_of_tokens = num_of_tokens ,
251
324
latency = latency ,
325
+ cost = cost ,
252
326
)
253
327
254
- self ._handle_data_publishing ()
328
+ config = self .data_config .copy ()
329
+ config ["costColumnName" ] = "cost"
330
+
331
+ self ._handle_data_publishing (config = config )
255
332
# pylint: disable=broad-except
256
333
except Exception as e :
257
334
logger .error ("Failed to monitor completion request. %s" , e )
@@ -323,12 +400,25 @@ def _split_list(lst: List, n_parts: int) -> List[List]:
323
400
start = end
324
401
return result
325
402
403
+ def get_cost_estimate (
404
+ self , num_input_tokens : int , num_output_tokens : int , model : str
405
+ ) -> float :
406
+ """Returns the cost estimate for a given model and number of tokens."""
407
+ if model not in self .COST_PER_TOKEN :
408
+ return None
409
+ cost_per_token = self .COST_PER_TOKEN [model ]
410
+ return (
411
+ cost_per_token ["input" ] * num_input_tokens
412
+ + cost_per_token ["output" ] * num_output_tokens
413
+ )
414
+
326
415
def _append_row_to_df (
327
416
self ,
328
417
input_data : Dict [str , str ],
329
418
output_data : str ,
330
419
num_of_tokens : int ,
331
420
latency : float ,
421
+ cost : float ,
332
422
) -> None :
333
423
"""Appends a row with input/output, number of tokens, and latency to the
334
424
df."""
@@ -340,6 +430,7 @@ def _append_row_to_df(
340
430
"output" : output_data ,
341
431
"tokens" : num_of_tokens ,
342
432
"latency" : latency ,
433
+ "cost" : cost ,
343
434
},
344
435
}
345
436
]
@@ -352,7 +443,9 @@ def _append_row_to_df(
352
443
# Perform casting
353
444
input_columns = [col for col in self .df .columns if col .startswith ("message" )]
354
445
casting_dict = {col : object for col in input_columns }
355
- casting_dict .update ({"output" : object , "tokens" : int , "latency" : float })
446
+ casting_dict .update (
447
+ {"output" : object , "tokens" : int , "latency" : float , "cost" : float }
448
+ )
356
449
self .df = self .df .astype (casting_dict )
357
450
358
451
def _handle_data_publishing (self , config : Optional [Dict [str , any ]] = None ) -> None :
0 commit comments