Merge pull request #1120 from guardrails-ai/ai-proxy-updates

zsimjee · web-flow · commit 7d7864356700 · 2024-10-17T12:37:44.000-07:00
updates for telemetry performance and streaming and server multi node support
diff --git a/guardrails/api_client.py b/guardrails/api_client.py
@@ -104,6 +104,8 @@ def stream_validate(
                     )
                 if line:
                     json_output = json.loads(line)
+                    if json_output.get("error"):
+                        raise Exception(json_output.get("error").get("message"))
                     yield IValidationOutcome.from_dict(json_output)
 
     def get_history(self, guard_name: str, call_id: str):
diff --git a/guardrails/async_guard.py b/guardrails/async_guard.py
@@ -574,13 +574,15 @@ async def _stream_server_call(
                         validated_output=validated_output,
                         validation_passed=(validation_output.validation_passed is True),
                     )
-            if validation_output:
-                guard_history = self._api_client.get_history(
-                    self.name, validation_output.call_id
-                )
-                self.history.extend(
-                    [Call.from_interface(call) for call in guard_history]
-                )
+            # TODO re-enable this once we have a way to get history
+            # from a multi-node server
+            # if validation_output:
+            #     guard_history = self._api_client.get_history(
+            #         self.name, validation_output.call_id
+            #     )
+            #     self.history.extend(
+            #         [Call.from_interface(call) for call in guard_history]
+            #     )
         else:
             raise ValueError("AsyncGuard does not have an api client!")
 
diff --git a/guardrails/classes/llm/llm_response.py b/guardrails/classes/llm/llm_response.py
@@ -47,7 +47,12 @@ def to_interface(self) -> ILLMResponse:
             stream_output = [str(so) for so in copy_2]
 
         async_stream_output = None
-        if self.async_stream_output:
+        # dont do this again if already aiter-able were updating
+        # ourselves here so in memory
+        # this can cause issues
+        if self.async_stream_output and not hasattr(
+            self.async_stream_output, "__aiter__"
+        ):
             # tee doesn't work with async iterators
             # This may be destructive
             async_stream_output = []
diff --git a/guardrails/cli/create.py b/guardrails/cli/create.py
@@ -118,7 +118,7 @@ def generate_template_config(
     guard_instantiations = []
 
     for i, guard in enumerate(template["guards"]):
-        guard_instantiations.append(f"guard{i} = Guard.from_dict(guards[{i}])")
+        guard_instantiations.append(f"guard{i} = AsyncGuard.from_dict(guards[{i}])")
     guard_instantiations = "\n".join(guard_instantiations)
     # Interpolate variables
     output_content = template_content.format(
diff --git a/guardrails/cli/hub/template_config.py.template b/guardrails/cli/hub/template_config.py.template
@@ -1,6 +1,6 @@
 import json
 import os
-from guardrails import Guard
+from guardrails import AsyncGuard, Guard
 from guardrails.hub import {VALIDATOR_IMPORTS}
 
 try:
diff --git a/guardrails/guard.py b/guardrails/guard.py
@@ -1215,10 +1215,12 @@ def _single_server_call(self, *, payload: Dict[str, Any]) -> ValidationOutcome[O
                     error="The response from the server was empty!",
                 )
 
-            guard_history = self._api_client.get_history(
-                self.name, validation_output.call_id
-            )
-            self.history.extend([Call.from_interface(call) for call in guard_history])
+            # TODO reenable this when we have history support in
+            # multi-node server environments
+            # guard_history = self._api_client.get_history(
+            #     self.name, validation_output.call_id
+            # )
+            # self.history.extend([Call.from_interface(call) for call in guard_history])
 
             validation_summaries = []
             if self.history.last and self.history.last.iterations.last:
@@ -1281,13 +1283,15 @@ def _stream_server_call(
                         validated_output=validated_output,
                         validation_passed=(validation_output.validation_passed is True),
                     )
-            if validation_output:
-                guard_history = self._api_client.get_history(
-                    self.name, validation_output.call_id
-                )
-                self.history.extend(
-                    [Call.from_interface(call) for call in guard_history]
-                )
+
+            # TODO reenable this when sever supports multi-node history
+            # if validation_output:
+            #     guard_history = self._api_client.get_history(
+            #         self.name, validation_output.call_id
+            #     )
+            # self.history.extend(
+            #     [Call.from_interface(call) for call in guard_history]
+            # )
         else:
             raise ValueError("Guard does not have an api client!")
 
diff --git a/guardrails/llm_providers.py b/guardrails/llm_providers.py
@@ -498,6 +498,10 @@ def _invoke_llm(
             ),
         )
 
+        # these are gr only and should not be getting passed to llms
+        kwargs.pop("reask_prompt", None)
+        kwargs.pop("reask_instructions", None)
+
         response = completion(
             model=model,
             *args,
@@ -1088,6 +1092,10 @@ async def invoke_llm(
             ),
         )
 
+        # these are gr only and should not be getting passed to llms
+        kwargs.pop("reask_prompt", None)
+        kwargs.pop("reask_instructions", None)
+
         response = await acompletion(
             *args,
             **kwargs,
diff --git a/guardrails/run/async_stream_runner.py b/guardrails/run/async_stream_runner.py
@@ -148,6 +148,7 @@ async def async_step(
                 _ = self.is_last_chunk(chunk, api)
 
                 fragment += chunk_text
+
                 results = await validator_service.async_partial_validate(
                     chunk_text,
                     self.metadata,
@@ -157,7 +158,8 @@ async def async_step(
                     "$",
                     True,
                 )
-                validators = self.validation_map["$"] or []
+                validators = self.validation_map.get("$", [])
+
                 # collect the result validated_chunk into validation progress
                 # per validator
                 for result in results:
@@ -210,7 +212,7 @@ async def async_step(
                         validation_progress[validator_log.validator_name] += chunk
                 # if there is an entry for every validator
                 # run a merge and emit a validation outcome
-                if len(validation_progress) == len(validators):
+                if len(validation_progress) == len(validators) or len(validators) == 0:
                     if refrain_triggered:
                         current = ""
                     else:
diff --git a/guardrails/telemetry/guard_tracing.py b/guardrails/telemetry/guard_tracing.py
@@ -145,9 +145,18 @@ def trace_stream_guard(
             res = next(result)  # type: ignore
             # FIXME: This should only be called once;
             # Accumulate the validated output and call at the end
-            add_guard_attributes(guard_span, history, res)
-            add_user_attributes(guard_span)
-            yield res
+            if not guard_span.is_recording():
+                # Assuming you have a tracer instance
+                tracer = get_tracer(__name__)
+                # Create a new span and link it to the previous span
+                with tracer.start_as_current_span(
+                    "stream_guard_span",  # type: ignore
+                    links=[Link(guard_span.get_span_context())],
+                ) as new_span:
+                    guard_span = new_span
+                    add_guard_attributes(guard_span, history, res)
+                    add_user_attributes(guard_span)
+                    yield res
         except StopIteration:
             next_exists = False
 
@@ -180,6 +189,7 @@ def trace_guard_execution(
                     result, ValidationOutcome
                 ):
                     return trace_stream_guard(guard_span, result, history)
+
                 add_guard_attributes(guard_span, history, result)
                 add_user_attributes(guard_span)
                 return result
@@ -204,14 +214,14 @@ async def trace_async_stream_guard(
                 tracer = get_tracer(__name__)
                 # Create a new span and link it to the previous span
                 with tracer.start_as_current_span(
-                    "new_guard_span",  # type: ignore
+                    "async_stream_span",  # type: ignore
                     links=[Link(guard_span.get_span_context())],
                 ) as new_span:
                     guard_span = new_span
 
                     add_guard_attributes(guard_span, history, res)
                     add_user_attributes(guard_span)
-            yield res
+                    yield res
         except StopIteration:
             next_exists = False
         except StopAsyncIteration:
diff --git a/guardrails/telemetry/runner_tracing.py b/guardrails/telemetry/runner_tracing.py
@@ -265,6 +265,11 @@ def trace_call_wrapper(*args, **kwargs):
             ) as call_span:
                 try:
                     response = fn(*args, **kwargs)
+                    if isinstance(response, LLMResponse) and (
+                        response.async_stream_output or response.stream_output
+                    ):
+                        # TODO: Iterate, add a call attr each time
+                        return response
                     add_call_attributes(call_span, response, *args, **kwargs)
                     return response
                 except Exception as e:
diff --git a/guardrails/validator_service/validator_service_base.py b/guardrails/validator_service/validator_service_base.py
@@ -169,6 +169,8 @@ def run_validator(
 
     # requires at least 2 validators
     def multi_merge(self, original: str, new_values: list[str]) -> Optional[str]:
+        if len(new_values) == 0:
+            return original
         current = new_values.pop()
         while len(new_values) > 0:
             nextval = new_values.pop()
diff --git a/server_ci/config.py b/server_ci/config.py
@@ -1,6 +1,6 @@
 import json
 import os
-from guardrails import Guard
+from guardrails import AsyncGuard
 
 try:
     file_path = os.path.join(os.getcwd(), "guard-template.json")
@@ -11,4 +11,4 @@
     SystemExit(1)
 
 # instantiate guards
-guard0 = Guard.from_dict(guards[0])
+guard0 = AsyncGuard.from_dict(guards[0])
diff --git a/server_ci/tests/test_server.py b/server_ci/tests/test_server.py
@@ -1,7 +1,7 @@
 import openai
 import os
 import pytest
-from guardrails import Guard, settings
+from guardrails import AsyncGuard, Guard, settings
 
 # OpenAI compatible Guardrails API Guard
 openai.base_url = "http://127.0.0.1:8000/guards/test-guard/openai/v1/"
@@ -32,6 +32,59 @@ def test_guard_validation(mock_llm_output, validation_output, validation_passed,
         assert validation_outcome.validated_output == validation_output
 
 
+@pytest.mark.asyncio
+async def test_async_guard_validation():
+    settings.use_server = True
+    guard = AsyncGuard(name="test-guard")
+
+    validation_outcome = await guard(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "Tell me about Oranges in 5 words"}],
+        temperature=0.0,
+    )
+
+    assert validation_outcome.validation_passed is True  # noqa: E712
+    assert validation_outcome.validated_output == "Citrus fruit,"
+
+
+@pytest.mark.asyncio
+async def test_async_streaming_guard_validation():
+    settings.use_server = True
+    guard = AsyncGuard(name="test-guard")
+
+    async_iterator = await guard(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "Tell me about Oranges in 5 words"}],
+        stream=True,
+        temperature=0.0,
+    )
+
+    full_output = ""
+    async for validation_chunk in async_iterator:
+        full_output += validation_chunk.validated_output
+
+    assert full_output == "Citrus fruit,Citrus fruit,"
+
+
+@pytest.mark.asyncio
+async def test_sync_streaming_guard_validation():
+    settings.use_server = True
+    guard = Guard(name="test-guard")
+
+    iterator = guard(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "Tell me about Oranges in 5 words"}],
+        stream=True,
+        temperature=0.0,
+    )
+
+    full_output = ""
+    for validation_chunk in iterator:
+        full_output += validation_chunk.validated_output
+
+    assert full_output == "Citrus fruit,Citrus fruit,"
+
+
 @pytest.mark.parametrize(
     "message_content, output, validation_passed, error",
     [

Original file line number	Diff line number	Diff line change
`@@ -104,6 +104,8 @@ def stream_validate(`
`104`	`104`	`)`
`105`	`105`	`if line:`
`106`	`106`	`json_output = json.loads(line)`
	`107`	`+ if json_output.get("error"):`
	`108`	`+ raise Exception(json_output.get("error").get("message"))`
`107`	`109`	`yield IValidationOutcome.from_dict(json_output)`
`108`	`110`
`109`	`111`	`def get_history(self, guard_name: str, call_id: str):`