[Fix] Update TaskHandler.run_step to work with the new continue_conversation_with_tool_results interface. (#39)

nerdai · web-flow · commit b96bae4ab5dd · 2025-07-04T23:58:40.000-04:00
* update run step

* changelog

* beter cleanup
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,7 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
 
 ### Changed
 
-- Updated return type of `continue_conversation_with_tool_results` to `list[ChatMessage]` (#38)
+- Update `TaskHandler.run_step()` to work with updated `continue_conversation_with_tool_results` (#39)
+- Update return type of `continue_conversation_with_tool_results` to `list[ChatMessage]` (#38)
 
 ### Deleted
 
diff --git a/src/llm_agents_from_scratch/core/task_handler.py b/src/llm_agents_from_scratch/core/task_handler.py
@@ -181,20 +181,26 @@ async def run_step(self, step: TaskStep) -> TaskStepResult:
                 tool_call_results.append(tool_call_result)
 
             # send tool call results back to llm to get result
-            final_response = (
+            new_messages = (
                 await self.llm.continue_conversation_with_tool_results(
                     tool_call_results=tool_call_results,
                     chat_messages=chat_history,
                 )
             )
 
+            # get final content and update chat history
+            final_content = new_messages[-1].content
+            chat_history += new_messages
+        else:
+            final_content = response.content
+
         # augment rollout from this turn
         async with self._lock:
             self.rollout += self._rollout_contribution_from_single_run_step(
-                chat_history=chat_history + [final_response],
+                chat_history=chat_history,
             )
 
         return TaskStepResult(
             task_step=step,
-            content=final_response.content,
+            content=final_content,
         )
diff --git a/tests/test_task_handler.py b/tests/test_task_handler.py
@@ -1,4 +1,5 @@
 import asyncio
+import contextlib
 from unittest.mock import AsyncMock
 
 import pytest
@@ -62,7 +63,15 @@ async def fn() -> None:
 
     handler.background_task = asyncio.create_task(fn())
     with pytest.raises(TaskHandlerError):
-        handler.background_task = asyncio.create_task(fn())
+        new_task = asyncio.create_task(fn())
+        handler.background_task = new_task
+
+    # cleanup
+    handler.background_task.cancel()
+    new_task.cancel()
+    with contextlib.suppress(asyncio.CancelledError):
+        await handler.background_task
+        await new_task
 
 
 @pytest.mark.asyncio
@@ -211,10 +220,25 @@ async def plus_two(arg1: int) -> int:
         tool_calls=tool_calls,
     )
     # continue conversation with tool calls
-    mock_return_value = ChatMessage(
-        role=ChatRole.ASSISTANT,
-        content="The final response.",
-    )
+    mock_return_value = [
+        # tool calls
+        ChatMessage(
+            role=ChatRole.TOOL,
+            content="2",
+        ),
+        ChatMessage(
+            role=ChatRole.TOOL,
+            content="3",
+        ),
+        ChatMessage(
+            role=ChatRole.TOOL,
+            content="error: tool name `plus_three` doesn't exist",
+        ),
+        ChatMessage(
+            role=ChatRole.ASSISTANT,
+            content="The final response.",
+        ),
+    ]
     mock_llm.continue_conversation_with_tool_results.return_value = (
         mock_return_value
     )
@@ -252,3 +276,46 @@ async def plus_two(arg1: int) -> int:
     mock_llm.continue_conversation_with_tool_results.assert_awaited_once()
     assert step_result.task_step == step
     assert step_result.content == "The final response."
+
+
+@pytest.mark.asyncio
+async def test_run_step_without_tool_calls() -> None:
+    """Tests run step."""
+
+    # arrange mocks
+    mock_llm = AsyncMock()
+    mock_llm.chat.return_value = ChatMessage(
+        role=ChatRole.ASSISTANT,
+        content="Initial response.",
+    )
+
+    handler = TaskHandler(
+        task=Task(instruction="mock instruction"),
+        llm=mock_llm,
+        tools=[],
+    )
+
+    # act
+    step = TaskStep(
+        instruction="Some instruction.",
+        last_step=False,
+    )
+    step_result = await handler.run_step(step)
+
+    # assert
+    mock_llm.chat.assert_awaited_once_with(
+        input="Some instruction.",
+        chat_messages=[
+            ChatMessage(
+                role=ChatRole.SYSTEM,
+                content=DEFAULT_SYSTEM_MESSAGE.format(
+                    original_instruction="mock instruction",
+                    current_rollout="",
+                ),
+            ),
+        ],
+        tools=list(handler.tools_registry.keys()),
+    )
+    mock_llm.continue_conversation_with_tool_results.assert_not_awaited()
+    assert step_result.task_step == step
+    assert step_result.content == "Initial response."