Skip to content

Commit d927d5e

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
Add costColumnName to LLM data schemas and upload cost estimate for OpenAI LLM monitor
1 parent 8d1659a commit d927d5e

File tree

4 files changed

+149
-3
lines changed

4 files changed

+149
-3
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
66
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
77

8+
## Unreleased
9+
10+
### Added
11+
* Added `costColumnName` as an optional field in the config for LLM data.
12+
13+
### Changed
14+
* `llm_monitor` for OpenAI models now records the `cost` estimate and uploads it.
15+
816
## [0.1.0a20]
917

1018
### Added

openlayer/llm_monitors.py

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,66 @@ class OpenAIMonitor:
8686
8787
"""
8888

89+
# Last update: 2024-01-05
90+
COST_PER_TOKEN = {
91+
"babbage-002": {
92+
"input": 0.0004e-3,
93+
"output": 0.0004e-3,
94+
},
95+
"davinci-002": {
96+
"input": 0.002e-3,
97+
"output": 0.002e-3,
98+
},
99+
"gpt-3.5-turbo": {
100+
"input": 0.003e-3,
101+
"output": 0.006e-3,
102+
},
103+
"gpt-3.5-turbo-0301": {
104+
"input": 0.0015e-3,
105+
"output": 0.002e-3,
106+
},
107+
"gpt-3.5-turbo-0613": {
108+
"input": 0.0015e-3,
109+
"output": 0.002e-3,
110+
},
111+
"gpt-3.5-turbo-1106": {
112+
"input": 0.001e-3,
113+
"output": 0.002e-3,
114+
},
115+
"gpt-3.5-turbo-16k-0613": {
116+
"input": 0.003e-3,
117+
"output": 0.004e-3,
118+
},
119+
"gpt-3.5-turbo-instruct": {
120+
"input": 0.0015e-3,
121+
"output": 0.002e-3,
122+
},
123+
"gpt-4": {
124+
"input": 0.03e-3,
125+
"output": 0.06e-3,
126+
},
127+
"gpt-4-0314": {
128+
"input": 0.03e-3,
129+
"output": 0.06e-3,
130+
},
131+
"gpt-4-1106-preview": {
132+
"input": 0.01e-3,
133+
"output": 0.03e-3,
134+
},
135+
"gpt-4-1106-vision-preview": {
136+
"input": 0.01e-3,
137+
"output": 0.03e-3,
138+
},
139+
"gpt-4-32k": {
140+
"input": 0.06e-3,
141+
"output": 0.12e-3,
142+
},
143+
"gpt-4-32k-0314": {
144+
"input": 0.06e-3,
145+
"output": 0.12e-3,
146+
},
147+
}
148+
89149
def __init__(
90150
self,
91151
publish: bool = False,
@@ -207,15 +267,23 @@ def modified_create_chat_completion(*args, **kwargs) -> str:
207267
prompt, input_data = self.format_input(kwargs["messages"])
208268
output_data = response.choices[0].message.content.strip()
209269
num_of_tokens = response.usage.total_tokens
270+
cost = self.get_cost_estimate(
271+
model=kwargs.get("model"),
272+
num_input_tokens=response.usage.prompt_tokens,
273+
num_output_tokens=response.usage.completion_tokens,
274+
)
275+
210276
config = self.data_config.copy()
211277
config["prompt"] = prompt
212278
config.update({"inputVariableNames": list(input_data.keys())})
279+
config["costColumnName"] = "cost"
213280

214281
self._append_row_to_df(
215282
input_data=input_data,
216283
output_data=output_data,
217284
num_of_tokens=num_of_tokens,
218285
latency=latency,
286+
cost=cost,
219287
)
220288

221289
self._handle_data_publishing(config=config)
@@ -243,15 +311,24 @@ def modified_create_completion(*args, **kwargs):
243311
for input_data, choices in zip(prompts, choices_splits):
244312
output_data = choices[0].text.strip()
245313
num_of_tokens = int(response.usage.total_tokens / len(prompts))
314+
cost = self.get_cost_estimate(
315+
model=kwargs.get("model"),
316+
num_input_tokens=response.usage.prompt_tokens,
317+
num_output_tokens=response.usage.completion_tokens,
318+
)
246319

247320
self._append_row_to_df(
248321
input_data={"message": input_data},
249322
output_data=output_data,
250323
num_of_tokens=num_of_tokens,
251324
latency=latency,
325+
cost=cost,
252326
)
253327

254-
self._handle_data_publishing()
328+
config = self.data_config.copy()
329+
config["costColumnName"] = "cost"
330+
331+
self._handle_data_publishing(config=config)
255332
# pylint: disable=broad-except
256333
except Exception as e:
257334
logger.error("Failed to monitor completion request. %s", e)
@@ -323,12 +400,25 @@ def _split_list(lst: List, n_parts: int) -> List[List]:
323400
start = end
324401
return result
325402

403+
def get_cost_estimate(
404+
self, num_input_tokens: int, num_output_tokens: int, model: str
405+
) -> float:
406+
"""Returns the cost estimate for a given model and number of tokens."""
407+
if model not in self.COST_PER_TOKEN:
408+
return None
409+
cost_per_token = self.COST_PER_TOKEN[model]
410+
return (
411+
cost_per_token["input"] * num_input_tokens
412+
+ cost_per_token["output"] * num_output_tokens
413+
)
414+
326415
def _append_row_to_df(
327416
self,
328417
input_data: Dict[str, str],
329418
output_data: str,
330419
num_of_tokens: int,
331420
latency: float,
421+
cost: float,
332422
) -> None:
333423
"""Appends a row with input/output, number of tokens, and latency to the
334424
df."""
@@ -340,6 +430,7 @@ def _append_row_to_df(
340430
"output": output_data,
341431
"tokens": num_of_tokens,
342432
"latency": latency,
433+
"cost": cost,
343434
},
344435
}
345436
]
@@ -352,7 +443,9 @@ def _append_row_to_df(
352443
# Perform casting
353444
input_columns = [col for col in self.df.columns if col.startswith("message")]
354445
casting_dict = {col: object for col in input_columns}
355-
casting_dict.update({"output": object, "tokens": int, "latency": float})
446+
casting_dict.update(
447+
{"output": object, "tokens": int, "latency": float, "cost": float}
448+
)
356449
self.df = self.df.astype(casting_dict)
357450

358451
def _handle_data_publishing(self, config: Optional[Dict[str, any]] = None) -> None:

openlayer/model_runners/ll_model_runners.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,12 +441,36 @@ def _get_cost_estimate(self, response: Dict[str, Any]) -> float:
441441
class OpenAIChatCompletionRunner(LLModelRunner):
442442
"""Wraps OpenAI's chat completion model."""
443443

444-
# Last update: 2023-12-19
444+
# Last update: 2024-01-05
445445
COST_PER_TOKEN = {
446+
"babbage-002": {
447+
"input": 0.0004e-3,
448+
"output": 0.0004e-3,
449+
},
450+
"davinci-002": {
451+
"input": 0.002e-3,
452+
"output": 0.002e-3,
453+
},
454+
"gpt-3.5-turbo": {
455+
"input": 0.003e-3,
456+
"output": 0.006e-3,
457+
},
458+
"gpt-3.5-turbo-0301": {
459+
"input": 0.0015e-3,
460+
"output": 0.002e-3,
461+
},
462+
"gpt-3.5-turbo-0613": {
463+
"input": 0.0015e-3,
464+
"output": 0.002e-3,
465+
},
446466
"gpt-3.5-turbo-1106": {
447467
"input": 0.001e-3,
448468
"output": 0.002e-3,
449469
},
470+
"gpt-3.5-turbo-16k-0613": {
471+
"input": 0.003e-3,
472+
"output": 0.004e-3,
473+
},
450474
"gpt-3.5-turbo-instruct": {
451475
"input": 0.0015e-3,
452476
"output": 0.002e-3,
@@ -455,10 +479,26 @@ class OpenAIChatCompletionRunner(LLModelRunner):
455479
"input": 0.03e-3,
456480
"output": 0.06e-3,
457481
},
482+
"gpt-4-0314": {
483+
"input": 0.03e-3,
484+
"output": 0.06e-3,
485+
},
486+
"gpt-4-1106-preview": {
487+
"input": 0.01e-3,
488+
"output": 0.03e-3,
489+
},
490+
"gpt-4-1106-vision-preview": {
491+
"input": 0.01e-3,
492+
"output": 0.03e-3,
493+
},
458494
"gpt-4-32k": {
459495
"input": 0.06e-3,
460496
"output": 0.12e-3,
461497
},
498+
"gpt-4-32k-0314": {
499+
"input": 0.06e-3,
500+
"output": 0.12e-3,
501+
},
462502
}
463503

464504
def __init__(

openlayer/schemas/dataset_schemas.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ class LLMOutputSchema(ma.Schema):
104104
allow_none=True,
105105
load_default=None,
106106
)
107+
costColumnName = ma.fields.Str(
108+
validate=constants.COLUMN_NAME_VALIDATION_LIST,
109+
allow_none=True,
110+
load_default=None,
111+
)
107112
numOfTokenColumnName = ma.fields.Str(
108113
validate=constants.COLUMN_NAME_VALIDATION_LIST,
109114
allow_none=True,

0 commit comments

Comments
 (0)