Add costColumnName to LLM data schemas and upload cost estimate for OpenAI LLM monitor

gustavocidornelas · whoseoyster · commit d927d5e2d027 · 2024-01-06T19:16:22.000+05:30
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
+## Unreleased
+
+### Added
+* Added `costColumnName` as an optional field in the config for LLM data.
+
+### Changed
+* `llm_monitor` for OpenAI models now records the `cost` estimate and uploads it.
+
 ## [0.1.0a20]
 
 ### Added
diff --git a/openlayer/llm_monitors.py b/openlayer/llm_monitors.py
@@ -86,6 +86,66 @@ class OpenAIMonitor:
 
     """
 
+    # Last update: 2024-01-05
+    COST_PER_TOKEN = {
+        "babbage-002": {
+            "input": 0.0004e-3,
+            "output": 0.0004e-3,
+        },
+        "davinci-002": {
+            "input": 0.002e-3,
+            "output": 0.002e-3,
+        },
+        "gpt-3.5-turbo": {
+            "input": 0.003e-3,
+            "output": 0.006e-3,
+        },
+        "gpt-3.5-turbo-0301": {
+            "input": 0.0015e-3,
+            "output": 0.002e-3,
+        },
+        "gpt-3.5-turbo-0613": {
+            "input": 0.0015e-3,
+            "output": 0.002e-3,
+        },
+        "gpt-3.5-turbo-1106": {
+            "input": 0.001e-3,
+            "output": 0.002e-3,
+        },
+        "gpt-3.5-turbo-16k-0613": {
+            "input": 0.003e-3,
+            "output": 0.004e-3,
+        },
+        "gpt-3.5-turbo-instruct": {
+            "input": 0.0015e-3,
+            "output": 0.002e-3,
+        },
+        "gpt-4": {
+            "input": 0.03e-3,
+            "output": 0.06e-3,
+        },
+        "gpt-4-0314": {
+            "input": 0.03e-3,
+            "output": 0.06e-3,
+        },
+        "gpt-4-1106-preview": {
+            "input": 0.01e-3,
+            "output": 0.03e-3,
+        },
+        "gpt-4-1106-vision-preview": {
+            "input": 0.01e-3,
+            "output": 0.03e-3,
+        },
+        "gpt-4-32k": {
+            "input": 0.06e-3,
+            "output": 0.12e-3,
+        },
+        "gpt-4-32k-0314": {
+            "input": 0.06e-3,
+            "output": 0.12e-3,
+        },
+    }
+
     def __init__(
         self,
         publish: bool = False,
@@ -207,15 +267,23 @@ def modified_create_chat_completion(*args, **kwargs) -> str:
                 prompt, input_data = self.format_input(kwargs["messages"])
                 output_data = response.choices[0].message.content.strip()
                 num_of_tokens = response.usage.total_tokens
+                cost = self.get_cost_estimate(
+                    model=kwargs.get("model"),
+                    num_input_tokens=response.usage.prompt_tokens,
+                    num_output_tokens=response.usage.completion_tokens,
+                )
+
                 config = self.data_config.copy()
                 config["prompt"] = prompt
                 config.update({"inputVariableNames": list(input_data.keys())})
+                config["costColumnName"] = "cost"
 
                 self._append_row_to_df(
                     input_data=input_data,
                     output_data=output_data,
                     num_of_tokens=num_of_tokens,
                     latency=latency,
+                    cost=cost,
                 )
 
                 self._handle_data_publishing(config=config)
@@ -243,15 +311,24 @@ def modified_create_completion(*args, **kwargs):
                 for input_data, choices in zip(prompts, choices_splits):
                     output_data = choices[0].text.strip()
                     num_of_tokens = int(response.usage.total_tokens / len(prompts))
+                    cost = self.get_cost_estimate(
+                        model=kwargs.get("model"),
+                        num_input_tokens=response.usage.prompt_tokens,
+                        num_output_tokens=response.usage.completion_tokens,
+                    )
 
                     self._append_row_to_df(
                         input_data={"message": input_data},
                         output_data=output_data,
                         num_of_tokens=num_of_tokens,
                         latency=latency,
+                        cost=cost,
                     )
 
-                    self._handle_data_publishing()
+                    config = self.data_config.copy()
+                    config["costColumnName"] = "cost"
+
+                    self._handle_data_publishing(config=config)
             # pylint: disable=broad-except
             except Exception as e:
                 logger.error("Failed to monitor completion request. %s", e)
@@ -323,12 +400,25 @@ def _split_list(lst: List, n_parts: int) -> List[List]:
             start = end
         return result
 
+    def get_cost_estimate(
+        self, num_input_tokens: int, num_output_tokens: int, model: str
+    ) -> float:
+        """Returns the cost estimate for a given model and number of tokens."""
+        if model not in self.COST_PER_TOKEN:
+            return None
+        cost_per_token = self.COST_PER_TOKEN[model]
+        return (
+            cost_per_token["input"] * num_input_tokens
+            + cost_per_token["output"] * num_output_tokens
+        )
+
     def _append_row_to_df(
         self,
         input_data: Dict[str, str],
         output_data: str,
         num_of_tokens: int,
         latency: float,
+        cost: float,
     ) -> None:
         """Appends a row with input/output, number of tokens, and latency to the
         df."""
@@ -340,6 +430,7 @@ def _append_row_to_df(
                         "output": output_data,
                         "tokens": num_of_tokens,
                         "latency": latency,
+                        "cost": cost,
                     },
                 }
             ]
@@ -352,7 +443,9 @@ def _append_row_to_df(
         # Perform casting
         input_columns = [col for col in self.df.columns if col.startswith("message")]
         casting_dict = {col: object for col in input_columns}
-        casting_dict.update({"output": object, "tokens": int, "latency": float})
+        casting_dict.update(
+            {"output": object, "tokens": int, "latency": float, "cost": float}
+        )
         self.df = self.df.astype(casting_dict)
 
     def _handle_data_publishing(self, config: Optional[Dict[str, any]] = None) -> None:
diff --git a/openlayer/model_runners/ll_model_runners.py b/openlayer/model_runners/ll_model_runners.py
@@ -441,12 +441,36 @@ def _get_cost_estimate(self, response: Dict[str, Any]) -> float:
 class OpenAIChatCompletionRunner(LLModelRunner):
     """Wraps OpenAI's chat completion model."""
 
-    # Last update: 2023-12-19
+    # Last update: 2024-01-05
     COST_PER_TOKEN = {
+        "babbage-002": {
+            "input": 0.0004e-3,
+            "output": 0.0004e-3,
+        },
+        "davinci-002": {
+            "input": 0.002e-3,
+            "output": 0.002e-3,
+        },
+        "gpt-3.5-turbo": {
+            "input": 0.003e-3,
+            "output": 0.006e-3,
+        },
+        "gpt-3.5-turbo-0301": {
+            "input": 0.0015e-3,
+            "output": 0.002e-3,
+        },
+        "gpt-3.5-turbo-0613": {
+            "input": 0.0015e-3,
+            "output": 0.002e-3,
+        },
         "gpt-3.5-turbo-1106": {
             "input": 0.001e-3,
             "output": 0.002e-3,
         },
+        "gpt-3.5-turbo-16k-0613": {
+            "input": 0.003e-3,
+            "output": 0.004e-3,
+        },
         "gpt-3.5-turbo-instruct": {
             "input": 0.0015e-3,
             "output": 0.002e-3,
@@ -455,10 +479,26 @@ class OpenAIChatCompletionRunner(LLModelRunner):
             "input": 0.03e-3,
             "output": 0.06e-3,
         },
+        "gpt-4-0314": {
+            "input": 0.03e-3,
+            "output": 0.06e-3,
+        },
+        "gpt-4-1106-preview": {
+            "input": 0.01e-3,
+            "output": 0.03e-3,
+        },
+        "gpt-4-1106-vision-preview": {
+            "input": 0.01e-3,
+            "output": 0.03e-3,
+        },
         "gpt-4-32k": {
             "input": 0.06e-3,
             "output": 0.12e-3,
         },
+        "gpt-4-32k-0314": {
+            "input": 0.06e-3,
+            "output": 0.12e-3,
+        },
     }
 
     def __init__(
diff --git a/openlayer/schemas/dataset_schemas.py b/openlayer/schemas/dataset_schemas.py
@@ -104,6 +104,11 @@ class LLMOutputSchema(ma.Schema):
         allow_none=True,
         load_default=None,
     )
+    costColumnName = ma.fields.Str(
+        validate=constants.COLUMN_NAME_VALIDATION_LIST,
+        allow_none=True,
+        load_default=None,
+    )
     numOfTokenColumnName = ma.fields.Str(
         validate=constants.COLUMN_NAME_VALIDATION_LIST,
         allow_none=True,