heroku
diff --git a/‎MIA-schema.json
Lines changed: 394 additions & 0 deletions b/‎MIA-schema.json
Lines changed: 394 additions & 0 deletions
@@ -0,0 +1,394 @@
+{
+  "$schema": "http://json-schema.org/draft-07/hyper-schema#",
+  "title": "Heroku Managed Inference and Agent API",
+  "description": "API endpoints for generating conversational completions and vector embeddings using the Heroku Managed Inference and Agent add-on.",
+  "definitions": {
+    "chat_completions": {
+      "links": [
+        {
+          "rel": "chat_completions",
+          "href": "/v1/chat/completions",
+          "method": "POST",
+          "title": "Generate conversational completions",
+          "schema": {
+            "$ref": "#/definitions/chat_completions"
+          }
+        }
+      ],
+      "type": "object",
+      "properties": {
+        "model": {
+          "type": "string",
+          "description": "The model used for completion. Typically, you'll use your INFERENCE_MODEL_ID config variable for this value.",
+          "examples": [
+            "claude-3-5-sonnet"
+          ]
+        },
+        "messages": {
+          "type": "array",
+          "description": "An array of message objects representing the conversation history.",
+          "items": {
+            "type": "object",
+            "properties": {
+              "role": {
+                "type": "string",
+                "enum": [
+                  "user",
+                  "assistant",
+                  "system",
+                  "tool"
+                ],
+                "description": "The role of the message sender."
+              },
+              "content": {
+                "type": [
+                  "string",
+                  "array"
+                ],
+                "description": "The content of the message."
+              },
+              "refusal": {
+                "type": [
+                  "string",
+                  "null"
+                ],
+                "description": "A refusal message by the assistant."
+              },
+              "tool_calls": {
+                "type": "array",
+                "description": "Tool calls generated by the model.",
+                "items": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "The ID of the tool call."
+                    },
+                    "type": {
+                      "type": "string",
+                      "enum": [
+                        "function",
+                        "heroku_tool"
+                      ],
+                      "description": "The type of tool call."
+                    },
+                    "function": {
+                      "type": "object",
+                      "properties": {
+                        "name": {
+                          "type": "string",
+                          "description": "The name of the function to be called."
+                        },
+                        "arguments": {
+                          "type": "string",
+                          "description": "The arguments for the function call."
+                        }
+                      },
+                      "required": [
+                        "name",
+                        "arguments"
+                      ]
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "type",
+                    "function"
+                  ]
+                }
+              },
+              "tool_call_id": {
+                "type": "string",
+                "description": "The ID of the tool call that this message is responding to."
+              }
+            },
+            "required": [
+              "role",
+              "content"
+            ]
+          }
+        },
+        "max_tokens": {
+          "type": "integer",
+          "description": "The maximum number of tokens the model is allowed to generate before stopping.",
+          "maximum": 4096,
+          "default": 4096,
+          "examples": [
+            100
+          ]
+        },
+        "stop": {
+          "type": "array",
+          "description": "A list of strings where the model will stop generating further tokens once any of the strings is encountered in the response.",
+          "items": {
+            "type": "string"
+          },
+          "examples": [
+            [
+              "foo"
+            ]
+          ]
+        },
+        "stream": {
+          "type": "boolean",
+          "description": "Whether to stream responses incrementally via server-sent events.",
+          "default": false,
+          "examples": [
+            true
+          ]
+        },
+        "system": {
+          "type": "string",
+          "description": "An optional system prompt used to provide additional context or set behavior for the model.",
+          "examples": [
+            "Be concise and avoid waffling."
+          ]
+        },
+        "temperature": {
+          "type": "number",
+          "description": "Controls the randomness of the response. Values closer to 0 make the response more focused, while values closer to 1.0 encourage more diverse responses.",
+          "minimum": 0.0,
+          "maximum": 1.0,
+          "default": 1.0,
+          "examples": [
+            0.2
+          ]
+        },
+        "tool_choice": {
+          "type": [
+            "string",
+            "object"
+          ],
+          "description": "Specifies how the model should use the provided tools.",
+          "oneOf": [
+            {
+              "type": "string",
+              "enum": [
+                "none",
+                "auto",
+                "required"
+              ]
+            },
+            {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "function",
+                    "heroku_tool"
+                  ]
+                },
+                "function": {
+                  "type": "object",
+                  "properties": {
+                    "name": {
+                      "type": "string",
+                      "description": "The name of the function to be called."
+                    }
+                  },
+                  "required": [
+                    "name"
+                  ]
+                }
+              },
+              "required": [
+                "type",
+                "function"
+              ]
+            }
+          ],
+          "default": {
+            "type": "auto",
+            "disable_parallel_tool_use": false
+          },
+          "examples": [
+            {
+              "type": "any"
+            }
+          ]
+        },
+        "tools": {
+          "type": "array",
+          "description": "The tools that the model may call.",
+          "items": {
+            "type": "object",
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "function",
+                  "heroku_tool"
+                ]
+              },
+              "function": {
+                "type": "object",
+                "properties": {
+                  "name": {
+                    "type": "string",
+                    "description": "The name of the function to be called."
+                  },
+                  "description": {
+                    "type": "string",
+                    "description": "A description of what the function does."
+                  },
+                  "parameters": {
+                    "type": "object",
+                    "description": "The parameters the function accepts, described as a JSON Schema object."
+                  }
+                },
+                "required": [
+                  "name",
+                  "description",
+                  "parameters"
+                ]
+              }
+            },
+            "required": [
+              "type",
+              "function"
+            ]
+          },
+          "examples": [
+            [
+              {
+                "type": "function",
+                "function": {
+                  "name": "get_current_weather",
+                  "description": "Get the current weather in a given location",
+                  "parameters": {
+                    "type": "object",
+                    "properties": {
+                      "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. Portland, OR"
+                      }
+                    },
+                    "required": [
+                      "location"
+                    ]
+                  }
+                }
+              }
+            ]
+          ]
+        },
+        "top_p": {
+          "type": "number",
+          "description": "Specifies the proportion of tokens to consider when generating the next token, in terms of cumulative probability.",
+          "minimum": 0.0,
+          "maximum": 1.0,
+          "default": 0.999,
+          "examples": [
+            0.95
+          ]
+        }
+      },
+      "required": [
+        "model",
+        "messages"
+      ]
+    },
+    "embeddings": {
+      "links": [
+        {
+          "rel": "embeddings",
+          "href": "/v1/embeddings",
+          "method": "POST",
+          "title": "Generate vector embeddings",
+          "schema": {
+            "$ref": "#/definitions/embeddings"
+          }
+        }
+      ],
+      "type": "object",
+      "properties": {
+        "model": {
+          "type": "string",
+          "description": "ID of the embedding model to use.",
+          "examples": [
+            "cohere-embed-multilingual"
+          ]
+        },
+        "input": {
+          "type": "array",
+          "description": "An array of strings (up to 96) for the model to embed. Recommended length is less than 512 tokens per string.",
+          "items": {
+            "type": "string"
+          },
+          "examples": [
+            [
+              "example string 1",
+              "example string 2"
+            ]
+          ]
+        },
+        "input_type": {
+          "type": "string",
+          "description": "Specifies the type of input passed to the model. Prepends special tokens to the input.",
+          "enum": [
+            "search_document",
+            "search_query",
+            "classification",
+            "clustering"
+          ],
+          "default": "search_document",
+          "examples": [
+            "search_query"
+          ]
+        },
+        "encoding_format": {
+          "type": "string",
+          "description": "Determines the encoding format of the output.",
+          "enum": [
+            "raw",
+            "base64"
+          ],
+          "default": "raw",
+          "examples": [
+            "base64"
+          ]
+        },
+        "embedding_type": {
+          "type": "string",
+          "description": "Specifies the type(s) of embeddings to return.",
+          "enum": [
+            "float",
+            "int8",
+            "uint8",
+            "binary",
+            "ubinary"
+          ],
+          "default": "float",
+          "examples": [
+            "int8"
+          ]
+        }
+      },
+      "required": [
+        "model",
+        "input"
+      ]
+    }
+  },
+  "links": [
+    {
+      "rel": "chat_completions",
+      "href": "/v1/chat/completions",
+      "method": "POST",
+      "title": "Generate conversational completions",
+      "schema": {
+        "$ref": "#/definitions/chat_completions"
+      }
+    },
+    {
+      "rel": "embeddings",
+      "href": "/v1/embeddings",
+      "method": "POST",
+      "title": "Generate vector embeddings",
+      "schema": {
+        "$ref": "#/definitions/embeddings"
+      }
+    }
+  ]
+}