Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 57 additions & 58 deletions examples/gradio/gradio_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@
}
""".strip()

def chat_with_model(message, history, model_choice, instructions, effort, use_functions,
def chat_with_model(message, history, model_choice, instructions, effort, use_functions,
function_name, function_description, function_parameters,
use_browser_search, temperature, max_output_tokens, debug_mode):

if not message.strip():
return history, ""

# Append user message and empty assistant placeholder (idiomatic Gradio pattern)
history = history + [[message, ""]]

# Build messages list from history (excluding the empty assistant placeholder)
messages = []

# Convert history to messages format (excluding the last empty assistant message)
for user_msg, assistant_msg in history[:-1]:
if user_msg:
messages.append({
"type": "message",
"role": "user",
"role": "user",
"content": [{"type": "input_text", "text": user_msg}]
})
if assistant_msg:
Expand All @@ -42,14 +42,14 @@ def chat_with_model(message, history, model_choice, instructions, effort, use_fu
"role": "assistant",
"content": [{"type": "output_text", "text": assistant_msg}]
})

# Add current user message
messages.append({
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": message}]
})

# Prepare tools
tools = []
if use_functions:
Expand All @@ -62,18 +62,18 @@ def chat_with_model(message, history, model_choice, instructions, effort, use_fu
})
except json.JSONDecodeError:
pass

if use_browser_search:
tools.append({"type": "browser_search"})

# Get URL based on model (matching streamlit logic)
options = ["large", "small"]
URL = ("http://localhost:8081/v1/responses" if model_choice == options[1]
url = ("http://localhost:8081/v1/responses" if model_choice == options[1]
else "http://localhost:8000/v1/responses")

try:
response = requests.post(
URL,
url,
json={
"input": messages,
"stream": True,
Expand All @@ -86,32 +86,31 @@ def chat_with_model(message, history, model_choice, instructions, effort, use_fu
},
stream=True,
)

full_content = ""
text_delta = ""
current_output_index = 0

in_reasoning = False

for line in response.iter_lines(decode_unicode=True):
if not line or not line.startswith("data:"):
continue
data_str = line[len("data:"):].strip()
if not data_str:
continue

try:
data = json.loads(data_str)
except Exception:
continue

event_type = data.get("type", "")
output_index = data.get("output_index", 0)


if event_type == "response.output_item.added":
current_output_index = output_index

output_type = data.get("item", {}).get("type", "message")
text_delta = ""


if output_type == "reasoning":
if not in_reasoning:
full_content += "🤔 **Thinking...**\n"
Expand All @@ -120,56 +119,56 @@ def chat_with_model(message, history, model_choice, instructions, effort, use_fu
if in_reasoning:
full_content += "\n\n"
in_reasoning = False

elif event_type == "response.reasoning_text.delta":
delta = data.get("delta", "")
full_content += delta

# Update last assistant message (idiomatic Gradio pattern)
history[-1][1] = full_content
yield history, ""

elif event_type == "response.output_text.delta":
delta = data.get("delta", "")
full_content += delta
# Update last assistant message (idiomatic Gradio pattern)

# Update last assistant message (idiomatic Gradio pattern)
history[-1][1] = full_content
yield history, ""

elif event_type == "response.output_item.done":
item = data.get("item", {})
if item.get("type") == "function_call":
function_call_text = f"\n\n🔨 Called `{item.get('name')}`\n**Arguments**\n```json\n{item.get('arguments', '')}\n```"
full_content += function_call_text

# Update last assistant message (idiomatic Gradio pattern)
history[-1][1] = full_content
yield history, ""

elif item.get("type") == "web_search_call":
web_search_text = f"\n\n🌐 **Web Search**\n```json\n{json.dumps(item.get('action', {}), indent=2)}\n```\n✅ Done"
full_content += web_search_text

# Update last assistant message (idiomatic Gradio pattern)
history[-1][1] = full_content
yield history, ""

elif event_type == "response.completed":
response_data = data.get("response", {})
if debug_mode:
debug_info = response_data.get("metadata", {}).get("__debug", "")
if debug_info:
full_content += f"\n\n**Debug**\n```\n{debug_info}\n```"

# Update last assistant message (idiomatic Gradio pattern)
history[-1][1] = full_content
yield history, ""
break

# Return final history and empty string to clear textbox
return history, ""

except Exception as e:
error_message = f"❌ Error: {str(e)}"
history[-1][1] = error_message
Expand All @@ -179,69 +178,69 @@ def chat_with_model(message, history, model_choice, instructions, effort, use_fu
# Create the Gradio interface
with gr.Blocks(title="💬 Chatbot") as demo:
gr.Markdown("# 💬 Chatbot")

with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(height=500)

with gr.Row():
msg = gr.Textbox(placeholder="Type a message...", scale=4, show_label=False)
send_btn = gr.Button("Send", scale=1)

clear_btn = gr.Button("Clear Chat")

with gr.Column(scale=1):
model_choice = gr.Radio(["large", "small"], value="small", label="Model")

instructions = gr.Textbox(
label="Instructions",
label="Instructions",
value="You are a helpful assistant that can answer questions and help with tasks.",
lines=3
)

effort = gr.Radio(["low", "medium", "high"], value="medium", label="Reasoning effort")

gr.Markdown("#### Functions")
use_functions = gr.Checkbox(label="Use functions", value=False)

with gr.Column(visible=False) as function_group:
function_name = gr.Textbox(label="Function name", value="get_weather")
function_description = gr.Textbox(
label="Function description",
label="Function description",
value="Get the weather for a given city"
)
function_parameters = gr.Textbox(
label="Function parameters",
label="Function parameters",
value=DEFAULT_FUNCTION_PROPERTIES,
lines=6
)

# Conditional browser search (matching Streamlit logic)
# In Streamlit: if "show_browser" in st.query_params:
# For Gradio, we'll always show it (simplified)
gr.Markdown("#### Built-in Tools")
gr.Markdown("#### Built-in Tools")
use_browser_search = gr.Checkbox(label="Use browser search", value=False)

temperature = gr.Slider(0.0, 1.0, value=1.0, step=0.01, label="Temperature")
max_output_tokens = gr.Slider(1000, 20000, value=1024, step=100, label="Max output tokens")

debug_mode = gr.Checkbox(label="Debug mode", value=False)

# Event handlers
def toggle_function_group(use_funcs):
return gr.update(visible=use_funcs)

use_functions.change(toggle_function_group, use_functions, function_group)

# Chat functionality
inputs = [msg, chatbot, model_choice, instructions, effort, use_functions,
inputs = [msg, chatbot, model_choice, instructions, effort, use_functions,
function_name, function_description, function_parameters,
use_browser_search, temperature, max_output_tokens, debug_mode]

msg.submit(chat_with_model, inputs, [chatbot, msg])
send_btn.click(chat_with_model, inputs, [chatbot, msg])
clear_btn.click(lambda: [], outputs=chatbot)


if __name__ == "__main__":
demo.launch()
demo.launch()
5 changes: 2 additions & 3 deletions gpt_oss/evals/aime_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ def __init__(
num_examples: int | None = None, # restrict to a subset of the data for debugging
n_threads: int = 1,
):
path1 = f"https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-I.jsonl"
path1 = "https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-I.jsonl"
df1 = pandas.read_json(path1, lines=True)
path2 = f"https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-II.jsonl"
path2 = "https://huggingface.co/datasets/opencompass/AIME2025/raw/main/aime2025-II.jsonl"
df2 = pandas.read_json(path2, lines=True)
examples = [row.to_dict() for _, row in df1.iterrows()] + [row.to_dict() for _, row in df2.iterrows()]
examples = [{
Expand Down Expand Up @@ -94,4 +94,3 @@ def fn(row: dict):

results = report.map_with_progress(fn, self.examples, num_threads=self.n_threads)
return report.aggregate_results(results)

8 changes: 4 additions & 4 deletions gpt_oss/responses_api/inference/triton.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import datetime

import os
from typing import Callable

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
import torch
import torch.distributed as dist

from gpt_oss.triton.model import Cache, ModelConfig, Transformer

from gpt_oss.triton.model import Cache, Transformer

DEFAULT_TEMPERATURE = 0.0
CONTEXT = 16_384
Expand Down Expand Up @@ -73,7 +73,7 @@ def infer_next_token(
tokens_so_far = lcp(tokens_so_far, tokens)
for cache in caches:
cache.truncate(len(tokens_so_far))
all_tokens = tokens # for pdb

tokens = tokens[len(tokens_so_far) :]

if len(tokens) > 1:
Expand Down
Loading