From 4c1e796121039db74ea91b2515970cc3c0fdb1df Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Thu, 14 Aug 2025 14:55:58 +0200 Subject: [PATCH 01/10] feat: enhance tracing system with OpenTelemetry semantic conventions and configurable span formats Introduces a major enhancement to the NeMo Guardrails tracing and telemetry infrastructure with support for multiple span formats, OpenTelemetry semantic convention compliance, and privacy-focused content capture controls. The system now supports both flat (legacy) and OpenTelemetry-compliant span formats while maintaining backward compatibility. Key changes: - Add configurable span format support (flat/opentelemetry) - Implement OpenTelemetry semantic conventions for GenAI - Add privacy controls for prompt/response content capture - Enhance LLM call tracking with model provider information - Improve span extraction and modeling architecture - Add comprehensive test coverage for new functionality --- nemoguardrails/actions/llm/utils.py | 15 +- nemoguardrails/logging/explain.py | 6 +- nemoguardrails/rails/llm/config.py | 12 + nemoguardrails/rails/llm/llmrails.py | 16 +- nemoguardrails/tracing/__init__.py | 22 +- nemoguardrails/tracing/adapters/base.py | 2 +- .../tracing/adapters/opentelemetry.py | 182 ++++-- nemoguardrails/tracing/constants.py | 211 ++++++ nemoguardrails/tracing/interaction_types.py | 83 +++ nemoguardrails/tracing/span_extractors.py | 482 ++++++++++++++ nemoguardrails/tracing/span_format.py | 85 +++ nemoguardrails/tracing/spans.py | 354 ++++++++++ nemoguardrails/tracing/tracer.py | 18 +- tests/test_opentelemetry_adapter_v2.py | 519 +++++++++++++++ tests/test_span_extractors.py | 239 +++++++ tests/test_span_format_enum.py | 209 ++++++ tests/test_span_models_and_extractors.py | 271 ++++++++ tests/test_span_v2_integration.py | 161 +++++ tests/test_span_v2_otel_semantics.py | 604 ++++++++++++++++++ tests/test_spans.py | 98 +++ tests/test_tracing_adapters_filesystem.py | 8 +- tests/test_tracing_adapters_opentelemetry.py | 360 +++++++---- 22 files changed, 3739 insertions(+), 218 deletions(-) create mode 100644 nemoguardrails/tracing/constants.py create mode 100644 nemoguardrails/tracing/interaction_types.py create mode 100644 nemoguardrails/tracing/span_extractors.py create mode 100644 nemoguardrails/tracing/span_format.py create mode 100644 nemoguardrails/tracing/spans.py create mode 100644 tests/test_opentelemetry_adapter_v2.py create mode 100644 tests/test_span_extractors.py create mode 100644 tests/test_span_format_enum.py create mode 100644 tests/test_span_models_and_extractors.py create mode 100644 tests/test_span_v2_integration.py create mode 100644 tests/test_span_v2_otel_semantics.py create mode 100644 tests/test_spans.py diff --git a/nemoguardrails/actions/llm/utils.py b/nemoguardrails/actions/llm/utils.py index e58a1aba5..b1163081b 100644 --- a/nemoguardrails/actions/llm/utils.py +++ b/nemoguardrails/actions/llm/utils.py @@ -66,6 +66,8 @@ def _infer_model_name(llm: BaseLanguageModel): async def llm_call( llm: BaseLanguageModel, prompt: Union[str, List[dict]], + model_name: Optional[str] = None, + model_provider: Optional[str] = None, stop: Optional[List[str]] = None, custom_callback_handlers: Optional[List[AsyncCallbackHandler]] = None, ) -> str: @@ -76,7 +78,8 @@ async def llm_call( llm_call_info = LLMCallInfo() llm_call_info_var.set(llm_call_info) - llm_call_info.llm_model_name = _infer_model_name(llm) + llm_call_info.llm_model_name = model_name or _infer_model_name(llm) + llm_call_info.llm_provider_name = model_provider if custom_callback_handlers and custom_callback_handlers != [None]: all_callbacks = BaseCallbackManager( @@ -172,15 +175,15 @@ def get_colang_history( history += f'user "{event["text"]}"\n' elif event["type"] == "UserIntent": if include_texts: - history += f' {event["intent"]}\n' + history += f" {event['intent']}\n" else: - history += f'user {event["intent"]}\n' + history += f"user {event['intent']}\n" elif event["type"] == "BotIntent": # If we have instructions, we add them before the bot message. # But we only do that for the last bot message. if "instructions" in event and idx == last_bot_intent_idx: history += f"# {event['instructions']}\n" - history += f'bot {event["intent"]}\n' + history += f"bot {event['intent']}\n" elif event["type"] == "StartUtteranceBotAction" and include_texts: history += f' "{event["script"]}"\n' # We skip system actions from this log @@ -349,9 +352,9 @@ def flow_to_colang(flow: Union[dict, Flow]) -> str: if "_type" not in element: raise Exception("bla") if element["_type"] == "UserIntent": - colang_flow += f'user {element["intent_name"]}\n' + colang_flow += f"user {element['intent_name']}\n" elif element["_type"] == "run_action" and element["action_name"] == "utter": - colang_flow += f'bot {element["action_params"]["value"]}\n' + colang_flow += f"bot {element['action_params']['value']}\n" return colang_flow diff --git a/nemoguardrails/logging/explain.py b/nemoguardrails/logging/explain.py index f6e3b5bc0..d9c282d15 100644 --- a/nemoguardrails/logging/explain.py +++ b/nemoguardrails/logging/explain.py @@ -59,6 +59,10 @@ class LLMCallInfo(LLMCallSummary): default="unknown", description="The name of the model use for the LLM call.", ) + llm_provider_name: Optional[str] = Field( + default="unknown", + description="The provider of the model used for the LLM call, e.g. 'openai', 'nvidia'.", + ) class ExplainInfo(BaseModel): @@ -100,7 +104,7 @@ def print_llm_calls_summary(self): for i in range(len(self.llm_calls)): llm_call = self.llm_calls[i] msg = ( - f"{i+1}. Task `{llm_call.task}` took {llm_call.duration:.2f} seconds " + f"{i + 1}. Task `{llm_call.task}` took {llm_call.duration:.2f} seconds " + ( f"and used {llm_call.total_tokens} tokens." if total_tokens diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py index 91c9f0bae..0136fee58 100644 --- a/nemoguardrails/rails/llm/config.py +++ b/nemoguardrails/rails/llm/config.py @@ -364,6 +364,18 @@ class TracingConfig(BaseModel): default_factory=lambda: [LogAdapterConfig()], description="The list of tracing adapters to use. If not specified, the default adapters are used.", ) + span_format: str = Field( + default="opentelemetry", + description="The span format to use. Options are 'flat' (simple metrics) or 'opentelemetry' (OpenTelemetry semantic conventions).", + ) + enable_content_capture: bool = Field( + default=False, + description=( + "Capture prompts and responses (user/assistant/tool message content) in tracing/telemetry events. " + "Disabled by default for privacy and alignment with OpenTelemetry GenAI semantic conventions. " + "WARNING: Enabling this may include PII and sensitive data in your telemetry backend." + ), + ) class EmbeddingsCacheConfig(BaseModel): diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py index 96db05cda..835f381ec 100644 --- a/nemoguardrails/rails/llm/llmrails.py +++ b/nemoguardrails/rails/llm/llmrails.py @@ -244,6 +244,8 @@ def __init__( from nemoguardrails.tracing import create_log_adapters self._log_adapters = create_log_adapters(config.tracing) + else: + self._log_adapters = None # We run some additional checks on the config self._validate_config() @@ -1167,9 +1169,19 @@ async def generate_async( # lazy import to avoid circular dependency from nemoguardrails.tracing import Tracer - # Create a Tracer instance with instantiated adapters + span_format = getattr( + self.config.tracing, "span_format", "opentelemetry" + ) + enable_content_capture = getattr( + self.config.tracing, "enable_content_capture", False + ) + # Create a Tracer instance with instantiated adapters and span configuration tracer = Tracer( - input=messages, response=res, adapters=self._log_adapters + input=messages, + response=res, + adapters=self._log_adapters, + span_format=span_format, + enable_content_capture=enable_content_capture, ) await tracer.export_async() diff --git a/nemoguardrails/tracing/__init__.py b/nemoguardrails/tracing/__init__.py index d99d29e56..97eb81885 100644 --- a/nemoguardrails/tracing/__init__.py +++ b/nemoguardrails/tracing/__init__.py @@ -13,4 +13,24 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .tracer import InteractionLog, Tracer, create_log_adapters +from .interaction_types import InteractionLog, InteractionOutput +from .span_extractors import ( + SpanExtractor, + SpanExtractorV1, + SpanExtractorV2, + create_span_extractor, +) +from .spans import SpanEvent, SpanFlat, SpanOpentelemetry +from .tracer import Tracer, create_log_adapters + +___all__ = [ + SpanExtractor, + SpanExtractorV1, + SpanExtractorV2, + create_span_extractor, + Tracer, + create_log_adapters, + SpanEvent, + SpanFlat, + SpanOpentelemetry, +] diff --git a/nemoguardrails/tracing/adapters/base.py b/nemoguardrails/tracing/adapters/base.py index 6c355b0f3..5b4a2ad04 100644 --- a/nemoguardrails/tracing/adapters/base.py +++ b/nemoguardrails/tracing/adapters/base.py @@ -16,7 +16,7 @@ from abc import ABC, abstractmethod from typing import Optional -from nemoguardrails.eval.models import InteractionLog +from nemoguardrails.tracing.interaction_types import InteractionLog class InteractionLogAdapter(ABC): diff --git a/nemoguardrails/tracing/adapters/opentelemetry.py b/nemoguardrails/tracing/adapters/opentelemetry.py index 6044b3cfe..3dbdd7603 100644 --- a/nemoguardrails/tracing/adapters/opentelemetry.py +++ b/nemoguardrails/tracing/adapters/opentelemetry.py @@ -55,13 +55,13 @@ import warnings from importlib.metadata import version -from typing import TYPE_CHECKING, Optional, Type +from typing import TYPE_CHECKING, Any, Dict if TYPE_CHECKING: from nemoguardrails.tracing import InteractionLog try: - from opentelemetry import trace - from opentelemetry.trace import NoOpTracerProvider + from opentelemetry import trace # type: ignore + from opentelemetry.trace import NoOpTracerProvider # type: ignore except ImportError: raise ImportError( @@ -70,34 +70,7 @@ ) from nemoguardrails.tracing.adapters.base import InteractionLogAdapter - -# DEPRECATED: global dictionary to store registered exporters -# will be removed in v0.16.0 -_exporter_name_cls_map: dict[str, Type] = {} - - -def register_otel_exporter(name: str, exporter_cls: Type): - """Register a new exporter. - - Args: - name: The name to register the exporter under. - exporter_cls: The exporter class to register. - - Deprecated: - This function is deprecated and will be removed in version 0.16.0. - Please configure OpenTelemetry exporters directly in your application code. - See the migration guide at: - https://github.com/NVIDIA/NeMo-Guardrails/blob/main/examples/configs/tracing/README.md#migration-guide - """ - warnings.warn( - "register_otel_exporter is deprecated and will be removed in version 0.16.0. " - "Please configure OpenTelemetry exporters directly in your application code. " - "See the migration guide at: " - "https://github.com/NVIDIA/NeMo-Guardrails/blob/develop/examples/configs/tracing/README.md#migration-guide", - DeprecationWarning, - stacklevel=2, - ) - _exporter_name_cls_map[name] = exporter_cls +from nemoguardrails.tracing.spans import is_typed_span class OpenTelemetryAdapter(InteractionLogAdapter): @@ -114,40 +87,20 @@ class OpenTelemetryAdapter(InteractionLogAdapter): def __init__( self, service_name: str = "nemo_guardrails", - **kwargs, ): """ Initialize the OpenTelemetry adapter. Args: service_name: Service name for instrumentation scope (not used for resource) - **kwargs: Additional arguments (for backward compatibility) Note: Applications must configure the OpenTelemetry SDK before using this adapter. The adapter will use the globally configured tracer provider. """ - # check for deprecated parameters and warn users - deprecated_params = [ - "exporter", - "exporter_cls", - "resource_attributes", - "span_processor", - ] - used_deprecated = [param for param in deprecated_params if param in kwargs] - - if used_deprecated: - warnings.warn( - f"OpenTelemetry configuration parameters {used_deprecated} in YAML/config are deprecated " - "and will be ignored. Please configure OpenTelemetry in your application code. " - "See the migration guide at: " - "https://github.com/NVIDIA/NeMo-Guardrails/blob/main/examples/configs/tracing/README.md#migration-guide", - DeprecationWarning, - stacklevel=2, - ) # validate that OpenTelemetry is properly configured - provider = trace.get_tracer_provider() + provider = trace.get_tracer_provider() # type: ignore if provider is None or isinstance(provider, NoOpTracerProvider): warnings.warn( "No OpenTelemetry TracerProvider configured. Traces will not be exported. " @@ -158,7 +111,7 @@ def __init__( stacklevel=2, ) - self.tracer = trace.get_tracer( + self.tracer = trace.get_tracer( # type: ignore service_name, instrumenting_library_version=version("nemoguardrails"), schema_url="https://opentelemetry.io/schemas/1.26.0", @@ -166,10 +119,16 @@ def __init__( def transform(self, interaction_log: "InteractionLog"): """Transforms the InteractionLog into OpenTelemetry spans.""" - spans = {} + # get the actual interaction start time from the first rail + # all span times are relative offsets from this timestamp + base_time_ns = _get_base_time_ns(interaction_log) + + spans: Dict[str, Any] = {} for span_data in interaction_log.trace: - parent_span = spans.get(span_data.parent_id) + parent_span = ( + spans.get(span_data.parent_id) if span_data.parent_id else None + ) parent_context = ( trace.set_span_in_context(parent_span) if parent_span else None ) @@ -178,14 +137,21 @@ def transform(self, interaction_log: "InteractionLog"): span_data, parent_context, spans, - interaction_log.id, # trace_id + base_time_ns, ) async def transform_async(self, interaction_log: "InteractionLog"): """Transforms the InteractionLog into OpenTelemetry spans asynchronously.""" - spans = {} + # get the actual interaction start time from the first rail + # all span times are relative offsets from this timestamp + base_time_ns = _get_base_time_ns(interaction_log) + + spans: Dict[str, Any] = {} + for span_data in interaction_log.trace: - parent_span = spans.get(span_data.parent_id) + parent_span = ( + spans.get(span_data.parent_id) if span_data.parent_id else None + ) parent_context = ( trace.set_span_in_context(parent_span) if parent_span else None ) @@ -193,7 +159,7 @@ async def transform_async(self, interaction_log: "InteractionLog"): span_data, parent_context, spans, - interaction_log.id, # trace_id + base_time_ns, ) def _create_span( @@ -201,19 +167,99 @@ def _create_span( span_data, parent_context, spans, - trace_id, + base_time_ns, ): - with self.tracer.start_as_current_span( + """Create OTel span from a fully-formed SpanOpentelemetry or typed span object. + + This is a pure API bridge - all semantic attributes are already + set by the extractor. We only handle: + 1. Timestamp conversion (relative to absolute) + 2. Span kind mapping (string to enum) + 3. API calls to create spans and events + """ + # convert relative times to absolute timestamps + # the span times are relative offsets from the start of the trace + # base_time_ns represents the start time of the trace + # we simply add the relative offsets to get absolute times + relative_start_ns = int(span_data.start_time * 1_000_000_000) + relative_end_ns = int(span_data.end_time * 1_000_000_000) + + start_time_ns = base_time_ns + relative_start_ns + end_time_ns = base_time_ns + relative_end_ns + + if is_typed_span(span_data): + attributes = span_data.to_otel_attributes() + else: + attributes = {} + + from opentelemetry.trace import SpanKind as OTelSpanKind + + span_kind_map = { + "server": OTelSpanKind.SERVER, + "client": OTelSpanKind.CLIENT, + "internal": OTelSpanKind.INTERNAL, + } + + span_kind_str = attributes.get("span.kind", "internal") + otel_span_kind = span_kind_map.get(span_kind_str, OTelSpanKind.INTERNAL) + + span = self.tracer.start_span( span_data.name, context=parent_context, - ) as span: + start_time=start_time_ns, + kind=otel_span_kind, + ) + + if attributes: + for key, value in attributes.items(): + if key == "span.kind": + continue + span.set_attribute(key, value) + + # for V1 compatibility, also set metrics as attributes + if hasattr(span_data, "metrics") and span_data.metrics: for key, value in span_data.metrics.items(): span.set_attribute(key, value) - span.set_attribute("span_id", span_data.span_id) - span.set_attribute("trace_id", trace_id) - span.set_attribute("start_time", span_data.start_time) - span.set_attribute("end_time", span_data.end_time) - span.set_attribute("duration", span_data.duration) + if hasattr(span_data, "events") and span_data.events: + for event in span_data.events: + relative_event_ns = int(event.timestamp * 1_000_000_000) + event_time_ns = base_time_ns + relative_event_ns + + event_attrs = event.attributes.copy() if event.attributes else {} + + if event.body and isinstance(event.body, dict): + # merge body content into attributes for OTel compatibility + # (OTel events don't have separate body, just attributes) + for body_key, body_value in event.body.items(): + if body_key not in event_attrs: + event_attrs[body_key] = body_value + + span.add_event( + name=event.name, attributes=event_attrs, timestamp=event_time_ns + ) + + spans[span_data.span_id] = span + + span.end(end_time=end_time_ns) + + +def _get_base_time_ns(interaction_log: InteractionLog) -> int: + """Get the base time in nanoseconds for tracing spans. + + Args: + interaction_log: The interaction log containing rail timing information + + Returns: + Base time in nanoseconds, either from the first activated rail or current time + """ + if ( + interaction_log.activated_rails + and interaction_log.activated_rails[0].started_at + ): + return int(interaction_log.activated_rails[0].started_at * 1_000_000_000) + else: + # This shouldn't happen in normal operation, but provide a fallback + import time - spans[span_data.span_id] = span + return time.time_ns() diff --git a/nemoguardrails/tracing/constants.py b/nemoguardrails/tracing/constants.py new file mode 100644 index 000000000..3e0bf3179 --- /dev/null +++ b/nemoguardrails/tracing/constants.py @@ -0,0 +1,211 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""OpenTelemetry constants and semantic conventions for NeMo Guardrails.""" + + +class SpanKind: + """String constants for span kinds.""" + + SERVER = "server" + CLIENT = "client" + INTERNAL = "internal" + + +class SpanTypes: + """Internal span type identifiers used in span mapping. + + These are internal identifiers used to categorize spans before mapping + to actual span names. They represent the type of operation being traced. + + Note: 'llm_call' maps to various GenAI semantic convention span types + like inference (gen_ai.inference.client), embeddings, etc. + """ + + # NeMo Guardrails-specific internal types + INTERACTION = "interaction" # Entry point to guardrails + RAIL = "rail" # Rail execution + ACTION = "action" # Action execution + + # GenAI-related type (maps to official semantic conventions) + LLM_CALL = "llm_call" # maps to gen_ai.inference.client + + # NOTE: might use more specific types in the future + # could add more specific types that align with semantic conventions: + # INFERENCE = "inference" # for gen_ai.inference.client spans + # EMBEDDING = "embedding" # for gen_ai.embeddings.client spans + + +class SpanNamePatterns: + """Patterns used for identifying span types from span names.""" + + # patterns that indicate SERVER spans + INTERACTION = "interaction" + GUARDRAILS_REQUEST_PATTERN = "guardrails.request" + + # patterns that indicate CLIENT spans + GEN_AI_PREFIX = "gen_ai." + LLM = "llm" + COMPLETION = "completion" + + +class SystemConstants: + """System-level constants for NeMo Guardrails.""" + + SYSTEM_NAME = "nemo-guardrails" + UNKNOWN = "unknown" + + +class GenAIAttributes: + """GenAI semantic convention attributes following the draft specification. + + Note: These are based on the experimental OpenTelemetry GenAI semantic conventions + since they are not yet available in the stable semantic conventions package. + + See: https://opentelemetry.io/docs/specs/semconv/gen-ai/ + """ + + GEN_AI_SYSTEM = "gen_ai.system" # @deprecated + + GEN_AI_PROVIDER_NAME = "gen_ai.provider.name" + GEN_AI_OPERATION_NAME = "gen_ai.operation.name" + + GEN_AI_REQUEST_MODEL = "gen_ai.request.model" + GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens" + GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature" + GEN_AI_REQUEST_TOP_P = "gen_ai.request.top_p" + GEN_AI_REQUEST_TOP_K = "gen_ai.request.top_k" + GEN_AI_REQUEST_FREQUENCY_PENALTY = "gen_ai.request.frequency_penalty" + GEN_AI_REQUEST_PRESENCE_PENALTY = "gen_ai.request.presence_penalty" + GEN_AI_REQUEST_STOP_SEQUENCES = "gen_ai.request.stop_sequences" + + GEN_AI_RESPONSE_MODEL = "gen_ai.response.model" + GEN_AI_RESPONSE_ID = "gen_ai.response.id" + GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons" + + GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens" + GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens" + GEN_AI_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens" + + +class CommonAttributes: + """Common OpenTelemetry attributes used across spans.""" + + SPAN_KIND = "span.kind" + + +class GuardrailsAttributes: + """NeMo Guardrails-specific attributes for spans.""" + + # rail attributes + RAIL_TYPE = "rail.type" + RAIL_NAME = "rail.name" + RAIL_STOP = "rail.stop" + RAIL_DECISIONS = "rail.decisions" + + # action attributes + ACTION_NAME = "action.name" + ACTION_HAS_LLM_CALLS = "action.has_llm_calls" + ACTION_LLM_CALLS_COUNT = "action.llm_calls_count" + ACTION_PARAM_PREFIX = "action.param." # For dynamic action parameters + + +class SpanNames: + """Standard span names following OpenTelemetry GenAI semantic conventions. + + Based on: https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/ + + IMPORTANT: Span names must be low cardinality to avoid performance issues. + Variable/high cardinality data (like specific rail types, model names, etc.) + should go in attributes instead of the span name. + """ + + # server spans (entry points); NeMo Guardrails specific + GUARDRAILS_REQUEST = "guardrails.request" # Entry point for guardrails processing + + # internal spans; NeMo Guardrails specific + GUARDRAILS_RAIL = "guardrails.rail" # Use attributes for rail type/name + GUARDRAILS_ACTION = "guardrails.action" # Use attributes for action name + + # client spans (LLM calls), following official GenAI semantic conventions + # "Span name SHOULD be `{gen_ai.operation.name} {gen_ai.request.model}`" + # since model names are high cardinality, we'll build these dynamically + # these are fallback operation names when model is unknown + GEN_AI_COMPLETION = "completion" + GEN_AI_CHAT = "chat" + GEN_AI_EMBEDDING = "embedding" + + +class OperationNames: + """Standard operation names for GenAI semantic conventions. + + Note: This only defines standard LLM operations. Custom actions and tasks + should be passed through as-is since they are dynamic and user-defined. + """ + + # standard LLM operations (from GenAI semantic conventions) + COMPLETION = "completion" + CHAT = "chat" + EMBEDDING = "embedding" + + # default operation for guardrails interactions + GUARDRAILS = "guardrails" + + +class EventNames: + """Standard event names for OpenTelemetry GenAI semantic conventions. + + Based on official spec at: + https://github.com/open-telemetry/semantic-conventions/blob/main/model/gen-ai/events.yaml + """ + + GEN_AI_SYSTEM_MESSAGE = "gen_ai.system.message" + GEN_AI_USER_MESSAGE = "gen_ai.user.message" + GEN_AI_ASSISTANT_MESSAGE = "gen_ai.assistant.message" + # GEN_AI_TOOL_MESSAGE = "gen_ai.tool.message" + + GEN_AI_CHOICE = "gen_ai.choice" + + GEN_AI_CONTENT_PROMPT = "gen_ai.content.prompt" # @deprecated ; use GEN_AI_USER_MESSAGE instead, as we are still using text completions we should use it! + GEN_AI_CONTENT_COMPLETION = "gen_ai.content.completion" # @deprecated ; use GEN_AI_ASSISTANT_MESSAGE, but as we are still using text completions we should use it! + + +class GuardrailsEventNames: + """NeMo Guardrails-specific event names (not OTel GenAI conventions). + + These events represent internal guardrails state changes, not LLM API calls. + They use a guardrails-specific namespace to avoid confusion with OTel GenAI semantic conventions. + """ + + UTTERANCE_USER_FINISHED = "guardrails.utterance.user.finished" + UTTERANCE_BOT_STARTED = "guardrails.utterance.bot.started" + UTTERANCE_BOT_FINISHED = "guardrails.utterance.bot.finished" + + USER_MESSAGE = "guardrails.user_message" + + +class GuardrailsEventTypes: + """NeMo Guardrails internal event type constants. + + These are the type values from internal guardrails events. + """ + + UTTERANCE_USER_ACTION_FINISHED = "UtteranceUserActionFinished" + USER_MESSAGE = "UserMessage" + + START_UTTERANCE_BOT_ACTION = "StartUtteranceBotAction" + UTTERANCE_BOT_ACTION_FINISHED = "UtteranceBotActionFinished" + + SYSTEM_MESSAGE = "SystemMessage" diff --git a/nemoguardrails/tracing/interaction_types.py b/nemoguardrails/tracing/interaction_types.py new file mode 100644 index 000000000..ca8f658ed --- /dev/null +++ b/nemoguardrails/tracing/interaction_types.py @@ -0,0 +1,83 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Core models for the tracing system.""" + +from typing import Any, List, Optional, Union + +from pydantic import BaseModel, Field + +from nemoguardrails.rails.llm.options import ActivatedRail, GenerationLog +from nemoguardrails.tracing.span_extractors import SpanExtractor, create_span_extractor +from nemoguardrails.tracing.spans import SpanFlat, SpanOpentelemetry + + +class InteractionLog(BaseModel): + """Detailed log about the execution of an interaction.""" + + id: str = Field(description="A human readable id of the interaction.") + + activated_rails: List[ActivatedRail] = Field( + default_factory=list, description="Details about the activated rails." + ) + events: List[dict] = Field( + default_factory=list, + description="The full list of events recorded during the interaction.", + ) + trace: List[Union[SpanFlat, SpanOpentelemetry]] = Field( + default_factory=list, description="Detailed information about the execution." + ) + + +class InteractionOutput(BaseModel): + """Simple model for interaction output used in tracer.""" + + id: str = Field(description="A human readable id of the interaction.") + input: Any = Field(description="The input for the interaction.") + output: Optional[Any] = Field( + default=None, description="The output of the interaction." + ) + + +def extract_interaction_log( + interaction_output: InteractionOutput, + generation_log: GenerationLog, + span_format: str = "opentelemetry", + enable_content_capture: bool = False, +) -> InteractionLog: + """Extracts an `InteractionLog` object from an `GenerationLog` object. + + Args: + interaction_output: The interaction output + generation_log: The generation log + span_format: Span format to use ("flat" or "opentelemetry") + enable_content_capture: Whether to include content in trace events + """ + internal_events = generation_log.internal_events + + span_extractor: SpanExtractor = create_span_extractor( + span_format=span_format, + events=internal_events, + enable_content_capture=enable_content_capture, + ) + + spans = span_extractor.extract_spans(generation_log.activated_rails) + + return InteractionLog( + id=interaction_output.id, + activated_rails=generation_log.activated_rails, + events=generation_log.internal_events, + trace=spans, + ) diff --git a/nemoguardrails/tracing/span_extractors.py b/nemoguardrails/tracing/span_extractors.py new file mode 100644 index 000000000..3d31b9229 --- /dev/null +++ b/nemoguardrails/tracing/span_extractors.py @@ -0,0 +1,482 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Span extraction logic for different span versions.""" + +from abc import ABC, abstractmethod +from typing import List, Optional, Union + +from nemoguardrails.rails.llm.options import ActivatedRail +from nemoguardrails.tracing.constants import ( + EventNames, + GuardrailsEventNames, + GuardrailsEventTypes, + OperationNames, + SpanNames, + SpanTypes, + SystemConstants, +) +from nemoguardrails.tracing.spans import ( + ActionSpan, + InteractionSpan, + LLMSpan, + RailSpan, + SpanEvent, + SpanFlat, + SpanOpentelemetry, + TypedSpan, +) +from nemoguardrails.utils import new_uuid + + +class SpanExtractor(ABC): + """Base class for span extractors.""" + + @abstractmethod + def extract_spans( + self, activated_rails: List[ActivatedRail] + ) -> List[Union[SpanFlat, SpanOpentelemetry]]: + """Extract spans from activated rails.""" + ... + + +class SpanExtractorV1(SpanExtractor): + """Extract v1 spans (legacy format).""" + + def extract_spans( + self, activated_rails: List[ActivatedRail] + ) -> List[Union[SpanFlat, SpanOpentelemetry]]: + """Extract v1 spans from activated rails.""" + spans: List[SpanFlat] = [] + if not activated_rails: + return spans + + ref_time = activated_rails[0].started_at or 0.0 + + # Create interaction span + interaction_span = SpanFlat( + span_id=new_uuid(), + name=SpanTypes.INTERACTION, # V1 uses legacy naming + start_time=(activated_rails[0].started_at or 0.0) - ref_time, + end_time=(activated_rails[-1].finished_at or 0.0) - ref_time, + duration=(activated_rails[-1].finished_at or 0.0) + - (activated_rails[0].started_at or 0.0), + ) + + interaction_span.metrics.update( + { + "interaction_total": 1, + "interaction_seconds_avg": interaction_span.duration, + "interaction_seconds_total": interaction_span.duration, + } + ) + spans.append(interaction_span) + + # Process rails and actions + for activated_rail in activated_rails: + rail_span = SpanFlat( + span_id=new_uuid(), + name="rail: " + activated_rail.name, + parent_id=interaction_span.span_id, + start_time=(activated_rail.started_at or 0.0) - ref_time, + end_time=(activated_rail.finished_at or 0.0) - ref_time, + duration=activated_rail.duration or 0.0, + ) + spans.append(rail_span) + + for action in activated_rail.executed_actions: + action_span = SpanFlat( + span_id=new_uuid(), + name="action: " + action.action_name, + parent_id=rail_span.span_id, + start_time=(action.started_at or 0.0) - ref_time, + end_time=(action.finished_at or 0.0) - ref_time, + duration=action.duration or 0.0, + ) + + base_metric_name = f"action_{action.action_name}" + action_span.metrics.update( + { + f"{base_metric_name}_total": 1, + f"{base_metric_name}_seconds_avg": action.duration or 0.0, + f"{base_metric_name}_seconds_total": action.duration or 0.0, + } + ) + spans.append(action_span) + + # Process LLM calls + for llm_call in action.llm_calls: + model_name = llm_call.llm_model_name or SystemConstants.UNKNOWN + llm_span = SpanFlat( + span_id=new_uuid(), + name="LLM: " + model_name, + parent_id=action_span.span_id, + start_time=(llm_call.started_at or 0.0) - ref_time, + end_time=(llm_call.finished_at or 0.0) - ref_time, + duration=llm_call.duration or 0.0, + ) + + base_metric_name = f"llm_call_{model_name.replace('/', '_')}" + llm_span.metrics.update( + { + f"{base_metric_name}_total": 1, + f"{base_metric_name}_seconds_avg": llm_call.duration or 0.0, + f"{base_metric_name}_seconds_total": llm_call.duration + or 0.0, + f"{base_metric_name}_prompt_tokens_total": llm_call.prompt_tokens + or 0, + f"{base_metric_name}_completion_tokens_total": llm_call.completion_tokens + or 0, + f"{base_metric_name}_tokens_total": llm_call.total_tokens + or 0, + } + ) + spans.append(llm_span) + + return spans + + +class SpanExtractorV2(SpanExtractor): + """Extract v2 spans with OpenTelemetry semantic conventions.""" + + def __init__( + self, events: Optional[List[dict]] = None, enable_content_capture: bool = False + ): + """Initialize with optional events for extracting user/bot messages. + + Args: + events: Internal events from InteractionLog + enable_content_capture: Whether to include potentially sensitive content in events + """ + self.internal_events = events or [] + self.enable_content_capture = enable_content_capture + + def extract_spans( + self, activated_rails: List[ActivatedRail] + ) -> List[Union[SpanFlat, SpanOpentelemetry, TypedSpan]]: + """Extract v2 spans from activated rails with OpenTelemetry attributes.""" + spans: List[TypedSpan] = [] + ref_time = activated_rails[0].started_at or 0.0 + + interaction_span = InteractionSpan( + span_id=new_uuid(), + name=SpanNames.GUARDRAILS_REQUEST, + start_time=(activated_rails[0].started_at or 0.0) - ref_time, + end_time=(activated_rails[-1].finished_at or 0.0) - ref_time, + duration=(activated_rails[-1].finished_at or 0.0) + - (activated_rails[0].started_at or 0.0), + operation_name=OperationNames.GUARDRAILS, + service_name=SystemConstants.SYSTEM_NAME, + ) + spans.append(interaction_span) + + for activated_rail in activated_rails: + # Create typed RailSpan + rail_span = RailSpan( + span_id=new_uuid(), + name=SpanNames.GUARDRAILS_RAIL, # Low-cardinality name + parent_id=interaction_span.span_id, + start_time=(activated_rail.started_at or 0.0) - ref_time, + end_time=(activated_rail.finished_at or 0.0) - ref_time, + duration=activated_rail.duration or 0.0, + rail_type=activated_rail.type, + rail_name=activated_rail.name, + rail_stop=( + activated_rail.stop if activated_rail.stop is not None else None + ), + rail_decisions=( + activated_rail.decisions if activated_rail.decisions else None + ), + ) + spans.append(rail_span) + + for action in activated_rail.executed_actions: + # Create typed ActionSpan + action_span = ActionSpan( + span_id=new_uuid(), + name=SpanNames.GUARDRAILS_ACTION, + parent_id=rail_span.span_id, + start_time=(action.started_at or 0.0) - ref_time, + end_time=(action.finished_at or 0.0) - ref_time, + duration=action.duration or 0.0, + action_name=action.action_name, + has_llm_calls=len(action.llm_calls) > 0, + llm_calls_count=len(action.llm_calls), + action_params={ + k: v + for k, v in (action.action_params or {}).items() + if isinstance(v, (str, int, float, bool)) + }, + error=True if hasattr(action, "error") and action.error else None, + error_type=( + type(action.error).__name__ + if hasattr(action, "error") and action.error + else None + ), + error_message=( + str(action.error) + if hasattr(action, "error") and action.error + else None + ), + ) + spans.append(action_span) + + for llm_call in action.llm_calls: + model_name = llm_call.llm_model_name or SystemConstants.UNKNOWN + + provider_name = ( + llm_call.llm_provider_name or SystemConstants.UNKNOWN + ) + + # use the specific task name as operation name (custom operation) + # this provides better observability for NeMo Guardrails specific tasks + operation_name = llm_call.task or OperationNames.COMPLETION + + # follow OpenTelemetry convention: span name = "{operation} {model}" + span_name = f"{operation_name} {model_name}" + + # extract request parameters from raw_response if available + temperature = None + max_tokens = None + top_p = None + response_id = None + finish_reasons = None + + if llm_call.raw_response: + response_id = llm_call.raw_response.get("id") + finish_reasons = self._extract_finish_reasons( + llm_call.raw_response + ) + temperature = llm_call.raw_response.get("temperature") + max_tokens = llm_call.raw_response.get("max_tokens") + top_p = llm_call.raw_response.get("top_p") + + llm_span = LLMSpan( + span_id=new_uuid(), + name=span_name, + parent_id=action_span.span_id, + start_time=(llm_call.started_at or 0.0) - ref_time, + end_time=(llm_call.finished_at or 0.0) - ref_time, + duration=llm_call.duration or 0.0, + provider_name=provider_name, + request_model=model_name, + response_model=model_name, + operation_name=operation_name, + usage_input_tokens=llm_call.prompt_tokens, + usage_output_tokens=llm_call.completion_tokens, + usage_total_tokens=llm_call.total_tokens, + temperature=temperature, + max_tokens=max_tokens, + top_p=top_p, + response_id=response_id, + response_finish_reasons=finish_reasons, + # TODO: add error to LLMCallInfo for future release + # error=( + # True + # if hasattr(llm_call, "error") and llm_call.error + # else None + # ), + # error_type=( + # type(llm_call.error).__name__ + # if hasattr(llm_call, "error") and llm_call.error + # else None + # ), + # error_message=( + # str(llm_call.error) + # if hasattr(llm_call, "error") and llm_call.error + # else None + # ), + ) + + llm_events = self._extract_llm_events(llm_call, llm_span.start_time) + llm_span.events.extend(llm_events) + + spans.append(llm_span) + + # Add conversation events to the interaction span + if self.internal_events: + interaction_events = self._extract_conversation_events(ref_time) + interaction_span.events.extend(interaction_events) + + return spans + + def _extract_llm_events(self, llm_call, start_time: float) -> List[SpanEvent]: + """Extract OpenTelemetry GenAI message events from an LLM call.""" + events = [] + + # TODO: Update to use newer gen_ai.user.message and gen_ai.assistant.message events + # Currently using deprecated gen_ai.content.prompt and gen_ai.content.completion for simplicity + if llm_call.prompt: + # per OTel spec: content should NOT be captured by default + body = {"content": llm_call.prompt} if self.enable_content_capture else {} + events.append( + SpanEvent( + name=EventNames.GEN_AI_CONTENT_PROMPT, + timestamp=start_time, + body=body, + ) + ) + + if llm_call.completion: + # per OTel spec: content should NOT be captured by default + body = ( + {"content": llm_call.completion} if self.enable_content_capture else {} + ) + events.append( + SpanEvent( + name=EventNames.GEN_AI_CONTENT_COMPLETION, + timestamp=start_time + (llm_call.duration or 0), + body=body, + ) + ) + + return events + + def _extract_conversation_events(self, ref_time: float) -> List[SpanEvent]: + """Extract guardrails-specific conversation events from internal events. + + NOTE: These are NeMo Guardrails internal events, NOT OpenTelemetry GenAI events. + We use guardrails-specific namespacing to avoid confusion with OTel GenAI semantic conventions. + """ + events = [] + + for event in self.internal_events: + event_type = event.get("type", "") + body = dict() + event_timestamp = self._get_event_timestamp(event, ref_time) + + if event_type == GuardrailsEventTypes.UTTERANCE_USER_ACTION_FINISHED: + if self.enable_content_capture: + body["content"] = event.get("final_transcript", "") + body["type"] = event_type + events.append( + SpanEvent( + name=GuardrailsEventNames.UTTERANCE_USER_FINISHED, + timestamp=event_timestamp, + body=body, + ) + ) + + elif event_type == GuardrailsEventTypes.USER_MESSAGE: + if self.enable_content_capture: + body["content"] = event.get("text", "") + body["type"] = event_type + events.append( + SpanEvent( + name=GuardrailsEventNames.USER_MESSAGE, + timestamp=event_timestamp, + body=body, + ) + ) + + elif event_type == GuardrailsEventTypes.START_UTTERANCE_BOT_ACTION: + if self.enable_content_capture: + body["content"] = event.get("script", "") + body["type"] = event_type + events.append( + SpanEvent( + name=GuardrailsEventNames.UTTERANCE_BOT_STARTED, + timestamp=event_timestamp, + body=body, + ) + ) + elif event_type == GuardrailsEventTypes.UTTERANCE_BOT_ACTION_FINISHED: + if self.enable_content_capture: + body["content"] = event.get("final_script", "") + body["type"] = event_type + body["is_success"] = event.get("is_success", True) + events.append( + SpanEvent( + name=GuardrailsEventNames.UTTERANCE_BOT_FINISHED, + timestamp=event_timestamp, + body=body, + ) + ) + + return events + + def _get_event_timestamp(self, event: dict, ref_time: float) -> float: + """Extract timestamp from event or use reference time. + + Args: + event: The internal event dictionary + ref_time: Reference time to use as fallback (trace start time) + + Returns: + Timestamp in seconds relative to trace start + """ + event_created_at = event.get("event_created_at") + if event_created_at: + try: + from datetime import datetime + + dt = datetime.fromisoformat(event_created_at) + absolute_timestamp = dt.timestamp() + return absolute_timestamp - ref_time + except (ValueError, AttributeError): + pass + + # fallback: use reference time (event at start of trace) + return 0.0 + + def _extract_finish_reasons(self, raw_response: dict) -> Optional[List[str]]: + """Extract finish reasons from raw LLM response.""" + if not raw_response: + return None + + finish_reasons = [] + + if "finish_reason" in raw_response: + finish_reasons.append(raw_response["finish_reason"]) + + if not finish_reasons and raw_response: + finish_reasons = ["stop"] + + return finish_reasons if finish_reasons else None + + +from nemoguardrails.tracing.span_format import SpanFormat, validate_span_format + + +def create_span_extractor( + span_format: str = "flat", + events: Optional[List[dict]] = None, + enable_content_capture: bool = True, +) -> SpanExtractor: + """Create a span extractor based on format and configuration. + + Args: + span_format: Format of span extractor ('flat' or 'opentelemetry') + events: Internal events for OpenTelemetry extractor + enable_content_capture: Whether to capture content in spans + + Returns: + Configured span extractor instance + + Raises: + ValueError: If span_format is not supported + """ + format_enum = validate_span_format(span_format) + + if format_enum == SpanFormat.FLAT: + return SpanExtractorV1() # TODO: Rename to SpanExtractorFlat + elif format_enum == SpanFormat.OPENTELEMETRY: + return SpanExtractorV2( # TODO: Rename to SpanExtractorOTel + events=events, + enable_content_capture=enable_content_capture, + ) + else: + # This should never happen due to validation, but keeps type checker happy + raise ValueError(f"Unknown span format: {span_format}") diff --git a/nemoguardrails/tracing/span_format.py b/nemoguardrails/tracing/span_format.py new file mode 100644 index 000000000..56205073a --- /dev/null +++ b/nemoguardrails/tracing/span_format.py @@ -0,0 +1,85 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Span format definitions for NeMo Guardrails tracing.""" + +from enum import Enum +from typing import Literal, Union + + +class SpanFormat(str, Enum): + """Supported span formats for tracing. + + Inherits from str to allow direct string comparison and JSON serialization. + """ + + # flat structure with metrics dictionary (simple, minimal overhead) + FLAT = "flat" + + # OpenTelemetry Semantic Conventions compliant format + # see https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-events/ + OPENTELEMETRY = "opentelemetry" + + @classmethod + def from_string(cls, value: str) -> "SpanFormat": + """Create SpanFormat from string value. + + Args: + value: String representation of span format + + Returns: + SpanFormat enum value + + Raises: + ValueError: If value is not a valid span format + """ + try: + return cls(value.lower()) + except ValueError: + valid_formats = [f.value for f in cls] + raise ValueError( + f"Invalid span format: '{value}'. " + f"Valid formats are: {', '.join(valid_formats)}" + ) + + def __str__(self) -> str: + """Return string value for use in configs.""" + return self.value + + +# Type alias for function signatures +SpanFormatType = Union[SpanFormat, Literal["flat", "opentelemetry"], str] + + +def validate_span_format(value: SpanFormatType) -> SpanFormat: + """Validate and convert span format value to SpanFormat enum. + + Args: + value: Span format as enum, literal, or string + + Returns: + Validated SpanFormat enum value + + Raises: + ValueError: If value is not a valid span format + """ + if isinstance(value, SpanFormat): + return value + elif isinstance(value, str): + return SpanFormat.from_string(value) + else: + raise TypeError( + f"Span format must be a string or SpanFormat enum, got {type(value)}" + ) diff --git a/nemoguardrails/tracing/spans.py b/nemoguardrails/tracing/spans.py new file mode 100644 index 000000000..87373bdbe --- /dev/null +++ b/nemoguardrails/tracing/spans.py @@ -0,0 +1,354 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Final, Literal + +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Span models for NeMo Guardrails tracing system.""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Literal, Optional, Union + +from pydantic import BaseModel, Field + +from nemoguardrails.tracing.constants import ( + CommonAttributes, + GenAIAttributes, + GuardrailsAttributes, +) + + +class SpanEvent(BaseModel): + """Event that can be attached to a span.""" + + name: str = Field(description="Event name (e.g., 'gen_ai.user.message')") + timestamp: float = Field(description="Timestamp when the event occurred (relative)") + attributes: Dict[str, Any] = Field( + default_factory=dict, description="Event attributes" + ) + body: Optional[Dict[str, Any]] = Field( + default=None, description="Event body for structured data" + ) + + +class SpanFlat(BaseModel): + """Simple span model (v1) for basic tracing.""" + + span_id: str = Field(description="The id of the span.") + name: str = Field(description="A human-readable name for the span.") + parent_id: Optional[str] = Field( + default=None, description="The id of the parent span." + ) + resource_id: Optional[str] = Field( + default=None, description="The id of the resource." + ) + start_time: float = Field(description="The start time of the span.") + end_time: float = Field(description="The end time of the span.") + duration: float = Field(description="The duration of the span in seconds.") + metrics: Dict[str, Union[int, float]] = Field( + default_factory=dict, description="The metrics recorded during the span." + ) + + +class BaseSpan(BaseModel, ABC): + """Base span with common fields across all span types.""" + + span_id: str = Field(description="Unique identifier for this span") + name: str = Field(description="Human-readable name for the span") + parent_id: Optional[str] = Field(default=None, description="ID of the parent span") + + start_time: float = Field( + description="Start time relative to trace start (seconds)" + ) + end_time: float = Field(description="End time relative to trace start (seconds)") + duration: float = Field(description="Duration of the span in seconds") + + span_kind: Literal["server", "client", "internal"] = Field( + description="OpenTelemetry span kind" + ) + + events: List[SpanEvent] = Field( + default_factory=list, + description="Events attached to this span following OpenTelemetry conventions", + ) + + error: Optional[bool] = Field(default=None, description="Whether an error occurred") + error_type: Optional[str] = Field( + default=None, description="Type of error (e.g., exception class name)" + ) + error_message: Optional[str] = Field( + default=None, description="Error message or description" + ) + + custom_attributes: Dict[str, Any] = Field( + default_factory=dict, + description="Additional custom attributes not covered by typed fields", + ) + + @abstractmethod + def to_otel_attributes(self) -> Dict[str, Any]: + """Convert typed fields to flat OpenTelemetry attributes dictionary. + + Returns: + Dict containing OTel semantic convention attributes. + """ + pass + + def _base_attributes(self) -> Dict[str, Any]: + """Get common attributes for all span types.""" + attributes = { + CommonAttributes.SPAN_KIND: self.span_kind, + } + + # TODO: for future release, consider adding: + # if self.error is not None: + # attributes["error"] = self.error + # if self.error_type is not None: + # attributes["error.type"] = self.error_type + # if self.error_message is not None: + # attributes["error.message"] = self.error_message + + attributes.update(self.custom_attributes) + + return attributes + + +class InteractionSpan(BaseSpan): + """Top-level span for a guardrails interaction (server span).""" + + span_kind: Literal["server"] = "server" + + operation_name: str = Field( + default="guardrails", description="Operation name for this interaction" + ) + service_name: str = Field(default="nemo_guardrails", description="Service name") + + user_id: Optional[str] = Field(default=None, description="User identifier") + session_id: Optional[str] = Field(default=None, description="Session identifier") + request_id: Optional[str] = Field(default=None, description="Request identifier") + + def to_otel_attributes(self) -> Dict[str, Any]: + """Convert to OTel attributes.""" + attributes = self._base_attributes() + + attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] = self.operation_name + attributes["service.name"] = self.service_name + + if self.user_id is not None: + attributes["user.id"] = self.user_id + if self.session_id is not None: + attributes["session.id"] = self.session_id + if self.request_id is not None: + attributes["request.id"] = self.request_id + + return attributes + + +class RailSpan(BaseSpan): + """Span for a guardrail execution (internal span).""" + + span_kind: Literal["internal"] = "internal" + + # rail-specific attributes + rail_type: str = Field(description="Type of rail (e.g., input, output, dialog)") + rail_name: str = Field(description="Name of the rail (e.g., check_jailbreak)") + rail_stop: Optional[bool] = Field( + default=None, description="Whether the rail stopped execution" + ) + rail_decisions: Optional[List[str]] = Field( + default=None, description="Decisions made by the rail" + ) + + def to_otel_attributes(self) -> Dict[str, Any]: + """Convert to OTel attributes.""" + attributes = self._base_attributes() + + attributes[GuardrailsAttributes.RAIL_TYPE] = self.rail_type + attributes[GuardrailsAttributes.RAIL_NAME] = self.rail_name + + if self.rail_stop is not None: + attributes[GuardrailsAttributes.RAIL_STOP] = self.rail_stop + if self.rail_decisions is not None: + attributes[GuardrailsAttributes.RAIL_DECISIONS] = self.rail_decisions + + return attributes + + +class ActionSpan(BaseSpan): + """Span for an action execution (internal span).""" + + span_kind: Literal["internal"] = "internal" + + # action-specific attributes + action_name: str = Field(description="Name of the action being executed") + action_params: Dict[str, Any] = Field( + default_factory=dict, description="Parameters passed to the action" + ) + has_llm_calls: bool = Field( + default=False, description="Whether this action made LLM calls" + ) + llm_calls_count: int = Field( + default=0, description="Number of LLM calls made by this action" + ) + + def to_otel_attributes(self) -> Dict[str, Any]: + """Convert to OTel attributes.""" + attributes = self._base_attributes() + + attributes[GuardrailsAttributes.ACTION_NAME] = self.action_name + attributes[GuardrailsAttributes.ACTION_HAS_LLM_CALLS] = self.has_llm_calls + attributes[GuardrailsAttributes.ACTION_LLM_CALLS_COUNT] = self.llm_calls_count + + # add action parameters as individual attributes + for param_name, param_value in self.action_params.items(): + if isinstance(param_value, (str, int, float, bool)): + attributes[ + f"{GuardrailsAttributes.ACTION_PARAM_PREFIX}{param_name}" + ] = param_value + + return attributes + + +class LLMSpan(BaseSpan): + """Span for an LLM API call (client span).""" + + span_kind: Final[Literal["client"]] = "client" + + provider_name: str = Field( + description="LLM provider name (e.g., openai, anthropic)" + ) + request_model: str = Field(description="Model requested (e.g., gpt-4)") + response_model: str = Field( + description="Model that responded (usually same as request_model)" + ) + operation_name: str = Field( + description="Operation name (e.g., chat.completions, embeddings)" + ) + + usage_input_tokens: Optional[int] = Field( + default=None, description="Number of input tokens" + ) + usage_output_tokens: Optional[int] = Field( + default=None, description="Number of output tokens" + ) + usage_total_tokens: Optional[int] = Field( + default=None, description="Total number of tokens" + ) + + # Request parameters + temperature: Optional[float] = Field( + default=None, description="Temperature parameter" + ) + max_tokens: Optional[int] = Field( + default=None, description="Maximum tokens to generate" + ) + top_p: Optional[float] = Field(default=None, description="Top-p parameter") + top_k: Optional[int] = Field(default=None, description="Top-k parameter") + frequency_penalty: Optional[float] = Field( + default=None, description="Frequency penalty" + ) + presence_penalty: Optional[float] = Field( + default=None, description="Presence penalty" + ) + stop_sequences: Optional[List[str]] = Field( + default=None, description="Stop sequences" + ) + + response_id: Optional[str] = Field(default=None, description="Response identifier") + response_finish_reasons: Optional[List[str]] = Field( + default=None, description="Finish reasons for each choice" + ) + + def to_otel_attributes(self) -> Dict[str, Any]: + """Convert to OTel attributes.""" + attributes = self._base_attributes() + + attributes[GenAIAttributes.GEN_AI_PROVIDER_NAME] = self.provider_name + attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = self.request_model + attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = self.response_model + attributes[GenAIAttributes.GEN_AI_OPERATION_NAME] = self.operation_name + + if self.usage_input_tokens is not None: + attributes[ + GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS + ] = self.usage_input_tokens + if self.usage_output_tokens is not None: + attributes[ + GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS + ] = self.usage_output_tokens + if self.usage_total_tokens is not None: + attributes[ + GenAIAttributes.GEN_AI_USAGE_TOTAL_TOKENS + ] = self.usage_total_tokens + + if self.temperature is not None: + attributes[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] = self.temperature + if self.max_tokens is not None: + attributes[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] = self.max_tokens + if self.top_p is not None: + attributes[GenAIAttributes.GEN_AI_REQUEST_TOP_P] = self.top_p + if self.top_k is not None: + attributes[GenAIAttributes.GEN_AI_REQUEST_TOP_K] = self.top_k + if self.frequency_penalty is not None: + attributes[ + GenAIAttributes.GEN_AI_REQUEST_FREQUENCY_PENALTY + ] = self.frequency_penalty + if self.presence_penalty is not None: + attributes[ + GenAIAttributes.GEN_AI_REQUEST_PRESENCE_PENALTY + ] = self.presence_penalty + if self.stop_sequences is not None: + attributes[ + GenAIAttributes.GEN_AI_REQUEST_STOP_SEQUENCES + ] = self.stop_sequences + + if self.response_id is not None: + attributes[GenAIAttributes.GEN_AI_RESPONSE_ID] = self.response_id + if self.response_finish_reasons is not None: + attributes[ + GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS + ] = self.response_finish_reasons + + return attributes + + +TypedSpan = Union[InteractionSpan, RailSpan, ActionSpan, LLMSpan] + +SpanOpentelemetry = TypedSpan + + +def is_typed_span(span: Any) -> bool: + """Check if an object is a typed span (V2). + + Args: + span: Object to check + + Returns: + True if the object is a typed span, False otherwise + """ + return isinstance(span, TypedSpan) diff --git a/nemoguardrails/tracing/tracer.py b/nemoguardrails/tracing/tracer.py index 5ad59d5dd..b00c822cf 100644 --- a/nemoguardrails/tracing/tracer.py +++ b/nemoguardrails/tracing/tracer.py @@ -18,12 +18,15 @@ from contextlib import AsyncExitStack from typing import List, Optional -from nemoguardrails.eval.eval import _extract_interaction_log -from nemoguardrails.eval.models import InteractionLog, InteractionOutput from nemoguardrails.rails.llm.config import TracingConfig from nemoguardrails.rails.llm.options import GenerationLog, GenerationResponse from nemoguardrails.tracing.adapters.base import InteractionLogAdapter from nemoguardrails.tracing.adapters.registry import LogAdapterRegistry +from nemoguardrails.tracing.interaction_types import ( + InteractionLog, + InteractionOutput, + extract_interaction_log, +) def new_uuid() -> str: @@ -36,6 +39,8 @@ def __init__( input, response: GenerationResponse, adapters: Optional[List[InteractionLogAdapter]] = None, + span_format: str = "opentelemetry", + enable_content_capture: bool = False, ): self._interaction_output = InteractionOutput( id=new_uuid(), input=input[-1]["content"], output=response.response @@ -46,6 +51,8 @@ def __init__( raise RuntimeError("Generation log is missing.") self.adapters = adapters or [] + self._span_format = span_format + self._enable_content_capture = enable_content_capture def generate_interaction_log( self, @@ -59,7 +66,12 @@ def generate_interaction_log( if generation_log is None: generation_log = self._generation_log - interaction_log = _extract_interaction_log(interaction_output, generation_log) + interaction_log = extract_interaction_log( + interaction_output, + generation_log, + span_format=self._span_format, + enable_content_capture=self._enable_content_capture, + ) return interaction_log def add_adapter(self, adapter: InteractionLogAdapter): diff --git a/tests/test_opentelemetry_adapter_v2.py b/tests/test_opentelemetry_adapter_v2.py new file mode 100644 index 000000000..ea190f42b --- /dev/null +++ b/tests/test_opentelemetry_adapter_v2.py @@ -0,0 +1,519 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest.mock import MagicMock, patch + +from nemoguardrails.tracing import ( + InteractionLog, + SpanEvent, + SpanFlat, + SpanOpentelemetry, +) +from nemoguardrails.tracing.adapters.opentelemetry import OpenTelemetryAdapter +from nemoguardrails.tracing.spans import InteractionSpan, LLMSpan + + +class TestOpenTelemetryAdapterV2(unittest.TestCase): + """Test OpenTelemetryAdapter handling of v2 spans.""" + + def setUp(self): + """Set up test fixtures.""" + # Mock the tracer + self.mock_tracer = MagicMock() + self.mock_tracer_provider = MagicMock() + self.mock_tracer_provider.get_tracer.return_value = self.mock_tracer + + # Patch trace.get_tracer_provider + patcher = patch("opentelemetry.trace.get_tracer_provider") + self.mock_get_tracer_provider = patcher.start() + self.mock_get_tracer_provider.return_value = self.mock_tracer_provider + self.addCleanup(patcher.stop) + + self.adapter = OpenTelemetryAdapter() + + def test_v1_span_compatibility(self): + """Test that v1 spans still work correctly.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + v1_span = SpanFlat( + name="test_v1", + span_id="v1_123", + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={"metric1": 42}, + ) + + interaction_log = InteractionLog( + id="test_v1_log", activated_rails=[], events=[], trace=[v1_span] + ) + + self.adapter.transform(interaction_log) + + # Verify span was created + self.mock_tracer.start_span.assert_called_once() + + # Verify metrics were set as attributes without prefix + mock_span.set_attribute.assert_any_call("metric1", 42) + + # Should not try to add events + mock_span.add_event.assert_not_called() + + def test_v2_span_attributes(self): + """Test that v2 span attributes are properly handled.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + from nemoguardrails.tracing.spans import LLMSpan + + v2_span = LLMSpan( + name="LLM: gpt-4", + span_id="v2_123", + start_time=0.0, + end_time=2.0, + duration=2.0, + provider_name="openai", + request_model="gpt-4", + response_model="gpt-4", + operation_name="chat.completions", + usage_total_tokens=150, + custom_attributes={ + "rail.decisions": ["continue", "allow"], # List attribute in custom + }, + ) + + interaction_log = InteractionLog( + id="test_v2_log", activated_rails=[], events=[], trace=[v2_span] + ) + + self.adapter.transform(interaction_log) + + # Verify OpenTelemetry attributes were set + mock_span.set_attribute.assert_any_call("gen_ai.provider.name", "openai") + mock_span.set_attribute.assert_any_call("gen_ai.request.model", "gpt-4") + mock_span.set_attribute.assert_any_call("gen_ai.usage.total_tokens", 150) + + # Verify list was passed directly + # Note: OTel Python SDK automatically converts lists to strings + mock_span.set_attribute.assert_any_call("rail.decisions", ["continue", "allow"]) + + def test_v2_span_events(self): + """Test that v2 span events are properly added.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + events = [ + SpanEvent( + name="gen_ai.content.prompt", + timestamp=0.5, + body={"content": "What is AI?"}, + ), + SpanEvent( + name="gen_ai.content.completion", + timestamp=1.5, + body={"content": "AI stands for Artificial Intelligence..."}, + ), + SpanEvent( + name="gen_ai.choice", + timestamp=1.6, + body={"finish_reason": "stop", "index": 0}, + ), + ] + + v2_span = LLMSpan( + name="LLM: gpt-4", + span_id="v2_events", + start_time=0.0, + end_time=2.0, + duration=2.0, + provider_name="openai", + request_model="gpt-4", + response_model="gpt-4", + operation_name="chat.completions", + events=events, + ) + + interaction_log = InteractionLog( + id="test_events", activated_rails=[], events=[], trace=[v2_span] + ) + + self.adapter.transform(interaction_log) + + # Verify events were added + self.assertEqual(mock_span.add_event.call_count, 3) + + # Check first event (prompt) + call_args = mock_span.add_event.call_args_list[0] + self.assertEqual(call_args[1]["name"], "gen_ai.content.prompt") + # In new implementation, body content is merged directly into attributes + self.assertIn("content", call_args[1]["attributes"]) + self.assertEqual(call_args[1]["attributes"]["content"], "What is AI?") + + # Check choice event has finish reason + call_args = mock_span.add_event.call_args_list[2] + self.assertEqual(call_args[1]["name"], "gen_ai.choice") + # In new implementation, body fields are merged directly into attributes + self.assertIn("finish_reason", call_args[1]["attributes"]) + + def test_v2_span_metrics(self): + """Test that v2 span token usage is properly recorded as attributes.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + # In the new implementation, token usage is in attributes, not otel_metrics + v2_span = LLMSpan( + name="completion gpt-4", # Following new naming convention + span_id="v2_metrics", + start_time=0.0, + end_time=2.0, + duration=2.0, + provider_name="openai", + request_model="gpt-4", + response_model="gpt-4", + operation_name="completion", + usage_input_tokens=50, + usage_output_tokens=100, + usage_total_tokens=150, + ) + + interaction_log = InteractionLog( + id="test_metrics", activated_rails=[], events=[], trace=[v2_span] + ) + + self.adapter.transform(interaction_log) + + # Verify token usage is recorded as standard attributes per OpenTelemetry GenAI conventions + mock_span.set_attribute.assert_any_call("gen_ai.usage.input_tokens", 50) + mock_span.set_attribute.assert_any_call("gen_ai.usage.output_tokens", 100) + mock_span.set_attribute.assert_any_call("gen_ai.usage.total_tokens", 150) + mock_span.set_attribute.assert_any_call("gen_ai.provider.name", "openai") + mock_span.set_attribute.assert_any_call("gen_ai.request.model", "gpt-4") + + def test_mixed_v1_v2_spans(self): + """Test handling of mixed v1 and v2 spans in the same trace.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + v1_span = SpanFlat( + name="action: check_input", + span_id="v1_span", + start_time=0.0, + end_time=0.5, + duration=0.5, + metrics={"action_total": 1}, # Will be set as action_total (no prefix) + ) + + v2_span = LLMSpan( + name="LLM: gpt-4", + span_id="v2_span", + parent_id="v1_span", + start_time=0.1, + end_time=0.4, + duration=0.3, + provider_name="openai", + request_model="gpt-4", + response_model="gpt-4", + operation_name="chat.completions", + events=[ + SpanEvent( + name="gen_ai.content.prompt", + timestamp=0.1, + body={"content": "test"}, + ) + ], + ) + + interaction_log = InteractionLog( + id="test_mixed", activated_rails=[], events=[], trace=[v1_span, v2_span] + ) + + self.adapter.transform(interaction_log) + + # Verify both spans were created + self.assertEqual(self.mock_tracer.start_span.call_count, 2) + + # Verify v2 span had events added (v1 should not) + # Only the second span should have events + event_calls = [call for call in mock_span.add_event.call_args_list] + self.assertEqual(len(event_calls), 1) # Only v2 span has events + + def test_event_content_passthrough(self): + """Test that event content is passed through as-is by the adapter.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + from nemoguardrails.tracing.spans import InteractionSpan + + long_content = "x" * 2000 + + v2_span = InteractionSpan( + name="test", + span_id="truncate_test", + start_time=0.0, + end_time=1.0, + duration=1.0, + events=[ + SpanEvent( + name="gen_ai.content.prompt", + timestamp=0.5, + body={"content": long_content}, + ) + ], + ) + + interaction_log = InteractionLog( + id="test_truncate", activated_rails=[], events=[], trace=[v2_span] + ) + + self.adapter.transform(interaction_log) + + # Verify content was passed through as-is + # The adapter is now a thin bridge and doesn't truncate + # Truncation should be done by the extractor if needed + call_args = mock_span.add_event.call_args_list[0] + content = call_args[1]["attributes"]["content"] + self.assertEqual(len(content), 2000) # Full content passed through + self.assertEqual(content, "x" * 2000) + + def test_unique_span_timestamps_regression_fix(self): + """Test that each span gets unique timestamps - regression test for timestamp bug. + + This test would FAIL with the old buggy logic where all end_time_ns were identical. + It PASSES with the correct logic where each span has unique timestamps. + """ + created_spans = [] + + def track_span(*args, **kwargs): + span = MagicMock() + created_spans.append(span) + return span + + self.mock_tracer.start_span.side_effect = track_span + + # Create multiple V2 spans with different timings + from nemoguardrails.tracing.spans import ActionSpan, RailSpan + + spans = [ + InteractionSpan( + name="span_1", + span_id="1", + start_time=0.0, # Starts at trace beginning + end_time=1.0, # Ends after 1 second + duration=1.0, + custom_attributes={"type": "first"}, + ), + RailSpan( + name="span_2", + span_id="2", + start_time=0.5, # Starts 0.5s after trace start + end_time=2.0, # Ends after 2 seconds + duration=1.5, + rail_type="input", + rail_name="test_rail", + custom_attributes={"type": "second"}, + ), + ActionSpan( + name="span_3", + span_id="3", + start_time=1.0, # Starts 1s after trace start + end_time=1.5, # Ends after 1.5 seconds + duration=0.5, + action_name="test_action", + custom_attributes={"type": "third"}, + ), + ] + + interaction_log = InteractionLog( + id="test_timestamps", + activated_rails=[], + events=[], + trace=spans, + ) + + # Use a fixed base time for predictable results + import time + + with unittest.mock.patch("time.time_ns", return_value=1700000000_000_000_000): + self.adapter.transform(interaction_log) + + # Verify that each span was created + self.assertEqual(len(created_spans), 3) + + # Extract the end times for each span + end_times = [] + for span_mock in created_spans: + end_call = span_mock.end.call_args + end_times.append(end_call[1]["end_time"]) + + # CRITICAL TEST: All end times should be DIFFERENT + # With the bug, all end_times would be identical (base_time_ns) + unique_end_times = set(end_times) + self.assertEqual( + len(unique_end_times), + 3, + f"End times should be unique but got: {end_times}. " + f"This indicates the timestamp calculation bug has regressed!", + ) + + # Verify correct absolute timestamps + base_ns = 1700000000_000_000_000 + expected_end_times = [ + base_ns + 1_000_000_000, # span_1 ends at 1s + base_ns + 2_000_000_000, # span_2 ends at 2s + base_ns + 1_500_000_000, # span_3 ends at 1.5s + ] + + self.assertEqual(end_times, expected_end_times) + + def test_multiple_interactions_different_base_times(self): + """Test that multiple interactions get different base times.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + span1 = InteractionSpan( + name="span1", + span_id="1", + start_time=0.0, + end_time=1.0, + duration=1.0, + custom_attributes={"interaction": "first"}, + ) + + span2 = InteractionSpan( + name="span2", + span_id="2", + start_time=0.0, + end_time=1.0, + duration=1.0, + custom_attributes={"interaction": "second"}, + ) + + log1 = InteractionLog(id="log1", activated_rails=[], events=[], trace=[span1]) + log2 = InteractionLog(id="log2", activated_rails=[], events=[], trace=[span2]) + + # First interaction + import time + + with unittest.mock.patch("time.time_ns", return_value=1000000000_000_000_000): + self.adapter.transform(log1) + + first_start = self.mock_tracer.start_span.call_args[1]["start_time"] + + # Reset mock + self.mock_tracer.start_span.reset_mock() + + # Second interaction (100ms later) + with unittest.mock.patch("time.time_ns", return_value=1000000100_000_000_000): + self.adapter.transform(log2) + + second_start = self.mock_tracer.start_span.call_args[1]["start_time"] + + # The two interactions should have different base times + self.assertNotEqual(first_start, second_start) + self.assertEqual( + second_start - first_start, 100_000_000_000 + ) # 100ms difference + + def test_uses_actual_interaction_start_time_from_rails(self): + """Test that adapter uses the actual start time from activated rails, not current time.""" + import time + + from nemoguardrails.rails.llm.options import ActivatedRail + + one_hour_ago = time.time() - 3600 + + rail = ActivatedRail( + type="input", + name="test_rail", + started_at=one_hour_ago, + finished_at=one_hour_ago + 2.0, + duration=2.0, + ) + + span = InteractionSpan( + name="test_span", + span_id="test_123", + start_time=0.0, + end_time=1.0, + duration=1.0, + operation_name="test", + service_name="test_service", + ) + + interaction_log = InteractionLog( + id="test_actual_time", activated_rails=[rail], events=[], trace=[span] + ) + + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + self.adapter.transform(interaction_log) + + call_args = self.mock_tracer.start_span.call_args + actual_start_time_ns = call_args[1]["start_time"] + + expected_start_time_ns = int(one_hour_ago * 1_000_000_000) + self.assertEqual( + actual_start_time_ns, + expected_start_time_ns, + "Should use the actual interaction start time from rails, not current time", + ) + + end_call = mock_span.end.call_args + actual_end_time_ns = end_call[1]["end_time"] + expected_end_time_ns = expected_start_time_ns + 1_000_000_000 + + self.assertEqual( + actual_end_time_ns, + expected_end_time_ns, + "End time should be calculated relative to the actual interaction start", + ) + + def test_fallback_when_no_rail_timestamp(self): + """Test that adapter falls back to current time when rails have no timestamp.""" + span = InteractionSpan( + name="test_span", + span_id="test_no_rails", + start_time=0.0, + end_time=1.0, + duration=1.0, + operation_name="test", + service_name="test_service", + ) + + interaction_log = InteractionLog( + id="test_no_rails", activated_rails=[], events=[], trace=[span] + ) + + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + + with patch("time.time_ns", return_value=9999999999_000_000_000): + self.adapter.transform(interaction_log) + + call_args = self.mock_tracer.start_span.call_args + actual_start_time_ns = call_args[1]["start_time"] + + self.assertEqual( + actual_start_time_ns, + 9999999999_000_000_000, + "Should fall back to current time when no rail timestamps available", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_span_extractors.py b/tests/test_span_extractors.py new file mode 100644 index 000000000..709b9e61e --- /dev/null +++ b/tests/test_span_extractors.py @@ -0,0 +1,239 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +import pytest + +from nemoguardrails.logging.explain import LLMCallInfo +from nemoguardrails.rails.llm.options import ActivatedRail, ExecutedAction +from nemoguardrails.tracing import ( + SpanExtractorV1, + SpanExtractorV2, + SpanFlat, + create_span_extractor, +) +from nemoguardrails.tracing.spans import LLMSpan, is_typed_span + + +class TestSpanExtractors: + """Test span extraction for flat and OpenTelemetry formats.""" + + @pytest.fixture + def test_data(self): + """Set up test data for span extraction.""" + llm_call = LLMCallInfo( + task="generate_user_intent", + prompt="What is the weather?", + completion="I cannot provide weather information.", + llm_model_name="gpt-4", + llm_provider_name="openai", + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + started_at=time.time(), + finished_at=time.time() + 1.0, + duration=1.0, + ) + + action = ExecutedAction( + action_name="generate_user_intent", + action_params={"temperature": 0.7}, + llm_calls=[llm_call], + started_at=time.time(), + finished_at=time.time() + 1.5, + duration=1.5, + ) + + rail = ActivatedRail( + type="input", + name="check_jailbreak", + decisions=["continue"], + executed_actions=[action], + stop=False, + started_at=time.time(), + finished_at=time.time() + 2.0, + duration=2.0, + ) + + return [rail] + + def test_span_extractor_flat_format(self, test_data): + """Test flat format span extractor produces flat spans.""" + extractor = SpanExtractorV1() + spans = extractor.extract_spans(test_data) + + assert len(spans) > 0 + + # All spans should be flat format + for span in spans: + assert isinstance(span, SpanFlat) + assert not hasattr(span, "attributes") + + span_names = [s.name for s in spans] + assert "interaction" in span_names + assert "rail: check_jailbreak" in span_names + assert "action: generate_user_intent" in span_names + assert "LLM: gpt-4" in span_names + + def test_span_extractor_opentelemetry_attributes(self, test_data): + """Test OpenTelemetry span extractor adds semantic convention attributes.""" + extractor = SpanExtractorV2() + spans = extractor.extract_spans(test_data) + + # All spans should be typed spans + for span in spans: + assert is_typed_span(span) + + # LLM spans follow OpenTelemetry convention: "{operation} {model}" + llm_span = next(s for s in spans if s.name == "generate_user_intent gpt-4") + assert isinstance(llm_span, LLMSpan) + + assert llm_span.provider_name == "openai" + assert llm_span.request_model == "gpt-4" + assert llm_span.usage_input_tokens == 10 + + attributes = llm_span.to_otel_attributes() + assert "gen_ai.provider.name" in attributes + assert attributes["gen_ai.provider.name"] == "openai" + assert attributes["gen_ai.request.model"] == "gpt-4" + assert "gen_ai.usage.input_tokens" in attributes + assert attributes["gen_ai.usage.input_tokens"] == 10 + + def test_span_extractor_opentelemetry_events(self, test_data): + """Test OpenTelemetry span extractor adds events.""" + extractor = SpanExtractorV2(enable_content_capture=True) + spans = extractor.extract_spans(test_data) + + # LLM spans follow OpenTelemetry convention + llm_span = next(s for s in spans if s.name == "generate_user_intent gpt-4") + assert len(llm_span.events) > 0 + + event_names = [e.name for e in llm_span.events] + # Currently uses deprecated content events (TODO: update to newer format) + assert "gen_ai.content.prompt" in event_names + assert "gen_ai.content.completion" in event_names + + # Check event content (only present when content capture is enabled) + user_message_event = next( + e for e in llm_span.events if e.name == "gen_ai.content.prompt" + ) + assert user_message_event.body["content"] == "What is the weather?" + + def test_span_extractor_opentelemetry_metrics(self, test_data): + """Test OpenTelemetry span extractor adds metrics as attributes.""" + extractor = SpanExtractorV2() + spans = extractor.extract_spans(test_data) + + llm_span = next(s for s in spans if s.name == "generate_user_intent gpt-4") + assert isinstance(llm_span, LLMSpan) + + assert llm_span.usage_input_tokens == 10 + assert llm_span.usage_output_tokens == 20 + assert llm_span.usage_total_tokens == 30 + + attributes = llm_span.to_otel_attributes() + assert "gen_ai.usage.input_tokens" in attributes + assert "gen_ai.usage.output_tokens" in attributes + assert "gen_ai.usage.total_tokens" in attributes + + assert attributes["gen_ai.usage.input_tokens"] == 10 + assert attributes["gen_ai.usage.output_tokens"] == 20 + assert attributes["gen_ai.usage.total_tokens"] == 30 + assert attributes["gen_ai.provider.name"] == "openai" + + def test_span_extractor_conversation_events(self, test_data): + """Test OpenTelemetry span extractor extracts conversation events from internal events.""" + internal_events = [ + {"type": "UtteranceUserActionFinished", "final_transcript": "Hello bot"}, + {"type": "StartUtteranceBotAction", "script": "Hello! How can I help?"}, + {"type": "SystemMessage", "content": "You are a helpful assistant"}, + ] + + extractor = SpanExtractorV2(events=internal_events) + spans = extractor.extract_spans(test_data) + + interaction_span = next(s for s in spans if s.name == "guardrails.request") + assert len(interaction_span.events) > 0 + + event_names = [e.name for e in interaction_span.events] + assert "guardrails.utterance.user.finished" in event_names + assert "guardrails.utterance.bot.started" in event_names + + user_event = next( + e + for e in interaction_span.events + if e.name == "guardrails.utterance.user.finished" + ) + assert "type" in user_event.body + # Content not included by default (privacy) + assert "final_transcript" not in user_event.body + + +class TestSpanFormatConfiguration: + """Test span format configuration and factory.""" + + def test_create_span_extractor_flat(self): + """Test creating flat format span extractor.""" + extractor = create_span_extractor(span_format="flat") + assert isinstance(extractor, SpanExtractorV1) + + def test_create_span_extractor_opentelemetry(self): + """Test creating OpenTelemetry format span extractor.""" + extractor = create_span_extractor(span_format="opentelemetry") + assert isinstance(extractor, SpanExtractorV2) + + def test_create_invalid_format_raises_error(self): + """Test invalid span format raises ValueError.""" + with pytest.raises(ValueError) as exc_info: + create_span_extractor(span_format="invalid") + assert "Invalid span format" in str(exc_info.value) + + def test_opentelemetry_extractor_with_events(self): + """Test OpenTelemetry extractor can be created with events.""" + events = [{"type": "UserMessage", "text": "test"}] + extractor = create_span_extractor( + span_format="opentelemetry", events=events, enable_content_capture=False + ) + + assert isinstance(extractor, SpanExtractorV2) + assert extractor.internal_events == events + + def test_flat_extractor_ignores_extra_params(self): + """Test flat extractor ignores OpenTelemetry-specific parameters.""" + # Flat extractor should ignore events and enable_content_capture + extractor = create_span_extractor( + span_format="flat", events=[{"type": "test"}], enable_content_capture=True + ) + + assert isinstance(extractor, SpanExtractorV1) + # V1 extractor doesn't have these attributes + assert not hasattr(extractor, "internal_events") + assert not hasattr(extractor, "enable_content_capture") + + @pytest.mark.parametrize( + "format_str,expected_class", + [ + ("flat", SpanExtractorV1), + ("FLAT", SpanExtractorV1), + ("opentelemetry", SpanExtractorV2), + ("OPENTELEMETRY", SpanExtractorV2), + ("OpenTelemetry", SpanExtractorV2), + ], + ) + def test_case_insensitive_format(self, format_str, expected_class): + """Test that span format is case-insensitive.""" + extractor = create_span_extractor(span_format=format_str) + assert isinstance(extractor, expected_class) diff --git a/tests/test_span_format_enum.py b/tests/test_span_format_enum.py new file mode 100644 index 000000000..32b19b57b --- /dev/null +++ b/tests/test_span_format_enum.py @@ -0,0 +1,209 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from typing import Any + +import pytest + +from nemoguardrails.tracing.span_format import ( + SpanFormat, + SpanFormatType, + validate_span_format, +) + + +class TestSpanFormat: + """Test cases for SpanFormat enum.""" + + def test_enum_values(self): + """Test that enum has expected values.""" + assert SpanFormat.FLAT.value == "flat" + assert SpanFormat.OPENTELEMETRY.value == "opentelemetry" + + def test_string_inheritance(self): + """Test that SpanFormat inherits from str.""" + assert isinstance(SpanFormat.FLAT, str) + assert isinstance(SpanFormat.OPENTELEMETRY, str) + + def test_string_comparison(self): + """Test direct string comparison works.""" + assert SpanFormat.FLAT == "flat" + assert SpanFormat.OPENTELEMETRY == "opentelemetry" + assert SpanFormat.FLAT != "opentelemetry" + + def test_json_serialization(self): + """Test that enum values can be JSON serialized.""" + data = {"format": SpanFormat.FLAT} + json_str = json.dumps(data) + assert '"format": "flat"' in json_str + + parsed = json.loads(json_str) + assert parsed["format"] == "flat" + + def test_str_method(self): + """Test __str__ method returns value.""" + assert str(SpanFormat.FLAT) == "flat" + assert str(SpanFormat.OPENTELEMETRY) == "opentelemetry" + + def test_from_string_valid_values(self): + """Test from_string with valid values.""" + assert SpanFormat.from_string("flat") == SpanFormat.FLAT + assert SpanFormat.from_string("opentelemetry") == SpanFormat.OPENTELEMETRY + + assert SpanFormat.from_string("FLAT") == SpanFormat.FLAT + assert SpanFormat.from_string("OpenTelemetry") == SpanFormat.OPENTELEMETRY + assert SpanFormat.from_string("OPENTELEMETRY") == SpanFormat.OPENTELEMETRY + + def test_from_string_invalid_value(self): + """Test from_string with invalid value raises ValueError.""" + with pytest.raises(ValueError) as exc_info: + SpanFormat.from_string("invalid") + + error_msg = str(exc_info.value) + assert "Invalid span format: 'invalid'" in error_msg + assert "Valid formats are: flat, opentelemetry" in error_msg + + def test_from_string_empty_value(self): + """Test from_string with empty string raises ValueError.""" + with pytest.raises(ValueError): + SpanFormat.from_string("") + + def test_from_string_none_value(self): + """Test from_string with None raises appropriate error.""" + with pytest.raises(AttributeError): + SpanFormat.from_string(None) + + +class TestValidateSpanFormat: + """Test cases for validate_span_format function.""" + + def test_validate_span_format_enum(self): + """Test validation with SpanFormat enum.""" + result = validate_span_format(SpanFormat.FLAT) + assert result == SpanFormat.FLAT + assert isinstance(result, SpanFormat) + + result = validate_span_format(SpanFormat.OPENTELEMETRY) + assert result == SpanFormat.OPENTELEMETRY + assert isinstance(result, SpanFormat) + + def test_validate_span_format_string(self): + """Test validation with string values.""" + result = validate_span_format("flat") + assert result == SpanFormat.FLAT + assert isinstance(result, SpanFormat) + + result = validate_span_format("opentelemetry") + assert result == SpanFormat.OPENTELEMETRY + assert isinstance(result, SpanFormat) + + result = validate_span_format("FLAT") + assert result == SpanFormat.FLAT + + def test_validate_span_format_invalid_string(self): + """Test validation with invalid string raises ValueError.""" + with pytest.raises(ValueError) as exc_info: + validate_span_format("invalid") + + error_msg = str(exc_info.value) + assert "Invalid span format: 'invalid'" in error_msg + + def test_validate_span_format_invalid_type(self): + """Test validation with invalid type raises TypeError.""" + with pytest.raises(TypeError) as exc_info: + validate_span_format(123) + + error_msg = str(exc_info.value) + assert "Span format must be a string or SpanFormat enum" in error_msg + assert "got " in error_msg + + def test_validate_span_format_none(self): + """Test validation with None raises TypeError.""" + with pytest.raises(TypeError): + validate_span_format(None) + + def test_validate_span_format_list(self): + """Test validation with list raises TypeError.""" + with pytest.raises(TypeError): + validate_span_format(["flat"]) + + def test_validate_span_format_dict(self): + """Test validation with dict raises TypeError.""" + with pytest.raises(TypeError): + validate_span_format({"format": "flat"}) + + +class TestSpanFormatType: + """Test cases for SpanFormatType type alias.""" + + def test_type_alias_accepts_enum(self): + """Test that type alias accepts SpanFormat enum.""" + + def test_function(format_type: SpanFormatType) -> SpanFormat: + return validate_span_format(format_type) + + result = test_function(SpanFormat.FLAT) + assert result == SpanFormat.FLAT + + def test_type_alias_accepts_string(self): + """Test that type alias accepts string values.""" + + def test_function(format_type: SpanFormatType) -> SpanFormat: + return validate_span_format(format_type) + + result = test_function("flat") + assert result == SpanFormat.FLAT + + result = test_function("opentelemetry") + assert result == SpanFormat.OPENTELEMETRY + + +class TestSpanFormatIntegration: + """Integration tests for span format functionality.""" + + def test_config_usage_pattern(self): + """Test typical configuration usage pattern.""" + config_value = "opentelemetry" + format_enum = validate_span_format(config_value) + + if format_enum == SpanFormat.OPENTELEMETRY: + assert True # Expected path + else: + pytest.fail("Unexpected format") + + def test_function_parameter_pattern(self): + """Test typical function parameter usage pattern.""" + + def process_spans(span_format: SpanFormatType = SpanFormat.FLAT): + validated_format = validate_span_format(span_format) + return validated_format + + result = process_spans() + assert result == SpanFormat.FLAT + + result = process_spans("opentelemetry") + assert result == SpanFormat.OPENTELEMETRY + + result = process_spans(SpanFormat.OPENTELEMETRY) + assert result == SpanFormat.OPENTELEMETRY + + def test_all_enum_values_have_tests(self): + """Ensure all enum values are tested.""" + tested_values = {"flat", "opentelemetry"} + actual_values = {format_enum.value for format_enum in SpanFormat} + assert ( + tested_values == actual_values + ), f"Missing tests for: {actual_values - tested_values}" diff --git a/tests/test_span_models_and_extractors.py b/tests/test_span_models_and_extractors.py new file mode 100644 index 000000000..3fe0f35d0 --- /dev/null +++ b/tests/test_span_models_and_extractors.py @@ -0,0 +1,271 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time + +import pytest + +from nemoguardrails.logging.explain import LLMCallInfo +from nemoguardrails.rails.llm.options import ActivatedRail, ExecutedAction +from nemoguardrails.tracing import ( + SpanEvent, + SpanExtractorV1, + SpanExtractorV2, + SpanFlat, + SpanOpentelemetry, + create_span_extractor, +) +from nemoguardrails.tracing.spans import LLMSpan, is_typed_span + + +class TestSpanModels: + def test_span_v1_creation(self): + span = SpanFlat( + span_id="test-123", + name="test span", + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={"test_metric": 42}, + ) + + assert span.span_id == "test-123" + assert span.name == "test span" + assert span.duration == 1.0 + assert span.metrics["test_metric"] == 42 + + assert not hasattr(span, "attributes") + assert not hasattr(span, "events") + assert not hasattr(span, "otel_metrics") + + def test_span_v2_creation(self): + """Test creating a v2 span - typed spans with explicit fields.""" + from nemoguardrails.tracing.spans import LLMSpan + + event = SpanEvent( + name="gen_ai.content.prompt", timestamp=0.5, body={"content": "test prompt"} + ) + + # V2 spans are typed with explicit fields + span = LLMSpan( + span_id="test-456", + name="generate_user_intent gpt-4", + start_time=0.0, + end_time=2.0, + duration=2.0, + provider_name="openai", + request_model="gpt-4", + response_model="gpt-4", + operation_name="chat.completions", + usage_input_tokens=10, + usage_output_tokens=20, + usage_total_tokens=30, + events=[event], + ) + + assert span.span_id == "test-456" + assert span.provider_name == "openai" + assert span.request_model == "gpt-4" + assert span.usage_input_tokens == 10 + assert len(span.events) == 1 + assert span.events[0].name == "gen_ai.content.prompt" + + # Check that to_otel_attributes works + attributes = span.to_otel_attributes() + assert attributes["gen_ai.provider.name"] == "openai" + assert attributes["gen_ai.request.model"] == "gpt-4" + + assert not isinstance(span, SpanFlat) + assert isinstance(span, SpanOpentelemetry) + + # Note: V1 and V2 spans are now fundamentally different types + # V1 is a simple span model, V2 is typed spans with explicit fields + # No conversion between them is needed or supported + + +class TestSpanExtractors: + @pytest.fixture + def test_data(self): + llm_call = LLMCallInfo( + task="generate_user_intent", + prompt="What is the weather?", + completion="I cannot provide weather information.", + llm_model_name="gpt-4", + llm_provider_name="openai", + prompt_tokens=10, + completion_tokens=20, + total_tokens=30, + started_at=time.time(), + finished_at=time.time() + 1.0, + duration=1.0, + ) + + action = ExecutedAction( + action_name="generate_user_intent", + action_params={"temperature": 0.7}, + llm_calls=[llm_call], + started_at=time.time(), + finished_at=time.time() + 1.5, + duration=1.5, + ) + + rail = ActivatedRail( + type="input", + name="check_jailbreak", + decisions=["continue"], + executed_actions=[action], + stop=False, + started_at=time.time(), + finished_at=time.time() + 2.0, + duration=2.0, + ) + + activated_rails = [rail] + return { + "activated_rails": activated_rails, + "llm_call": llm_call, + "action": action, + "rail": rail, + } + + def test_span_extractor_v1(self, test_data): + extractor = SpanExtractorV1() + spans = extractor.extract_spans(test_data["activated_rails"]) + + assert len(spans) > 0 + + for span in spans: + assert isinstance(span, SpanFlat) + assert not hasattr(span, "attributes") + + span_names = [s.name for s in spans] + assert "interaction" in span_names + assert "rail: check_jailbreak" in span_names + assert "action: generate_user_intent" in span_names + assert "LLM: gpt-4" in span_names + + def test_span_extractor_v2_attributes(self, test_data): + extractor = SpanExtractorV2() + spans = extractor.extract_spans(test_data["activated_rails"]) + + for span in spans: + # Now we expect typed spans + assert is_typed_span(span) + + # In V2, LLM spans follow OpenTelemetry convention: "{operation} {model}" + llm_span = next(s for s in spans if s.name == "generate_user_intent gpt-4") + assert isinstance(llm_span, LLMSpan) + + # For typed spans, check the fields directly + assert llm_span.provider_name == "openai" + assert llm_span.request_model == "gpt-4" + assert llm_span.usage_input_tokens == 10 + + # Also verify attributes conversion works + attributes = llm_span.to_otel_attributes() + assert "gen_ai.provider.name" in attributes + assert attributes["gen_ai.provider.name"] == "openai" + assert attributes["gen_ai.request.model"] == "gpt-4" + assert "gen_ai.usage.input_tokens" in attributes + assert attributes["gen_ai.usage.input_tokens"] == 10 + + def test_span_extractor_v2_events(self, test_data): + extractor = SpanExtractorV2(enable_content_capture=True) + spans = extractor.extract_spans(test_data["activated_rails"]) + + # In V2, LLM spans follow OpenTelemetry convention: "{operation} {model}" + llm_span = next(s for s in spans if s.name == "generate_user_intent gpt-4") + assert len(llm_span.events) > 0 + + event_names = [e.name for e in llm_span.events] + # V2 currently uses deprecated content events for simplicity (TODO: update to newer format) + assert "gen_ai.content.prompt" in event_names + assert "gen_ai.content.completion" in event_names + + # Check user message event content (only present when content capture is enabled) + user_message_event = next( + e for e in llm_span.events if e.name == "gen_ai.content.prompt" + ) + assert user_message_event.body["content"] == "What is the weather?" + + def test_span_extractor_v2_metrics(self, test_data): + extractor = SpanExtractorV2() + spans = extractor.extract_spans(test_data["activated_rails"]) + + # In V2, LLM spans follow OpenTelemetry convention: "{operation} {model}" + llm_span = next(s for s in spans if s.name == "generate_user_intent gpt-4") + assert isinstance(llm_span, LLMSpan) + + # Check typed fields + assert llm_span.usage_input_tokens == 10 + assert llm_span.usage_output_tokens == 20 + assert llm_span.usage_total_tokens == 30 + assert llm_span.provider_name == "openai" + + # Verify attributes conversion + attributes = llm_span.to_otel_attributes() + assert attributes["gen_ai.usage.total_tokens"] == 30 + assert attributes["gen_ai.provider.name"] == "openai" + + def test_span_extractor_v2_conversation_events(self, test_data): + internal_events = [ + {"type": "UtteranceUserActionFinished", "final_transcript": "Hello bot"}, + {"type": "StartUtteranceBotAction", "script": "Hello! How can I help?"}, + {"type": "SystemMessage", "content": "You are a helpful assistant"}, + ] + + # Test with content excluded by default (privacy compliant) + extractor = SpanExtractorV2(events=internal_events) + spans = extractor.extract_spans(test_data["activated_rails"]) + + interaction_span = next(s for s in spans if s.name == "guardrails.request") + assert len(interaction_span.events) > 0 + + event_names = [e.name for e in interaction_span.events] + # These are guardrails internal events, not OTel GenAI events + assert "guardrails.utterance.user.finished" in event_names + assert "guardrails.utterance.bot.started" in event_names + + user_event = next( + e + for e in interaction_span.events + if e.name == "guardrails.utterance.user.finished" + ) + # By default, content is NOT included (privacy compliant) + assert "type" in user_event.body + assert "final_transcript" not in user_event.body + + +class TestSpanVersionConfiguration: + def test_create_span_extractor_flat(self): + extractor = create_span_extractor(span_format="flat") + assert isinstance(extractor, SpanExtractorV1) + + def test_create_span_extractor_opentelemetry(self): + extractor = create_span_extractor(span_format="opentelemetry") + assert isinstance(extractor, SpanExtractorV2) + + def test_create_invalid_format(self): + with pytest.raises(ValueError, match="Invalid span format"): + create_span_extractor(span_format="invalid") + + def test_opentelemetry_extractor_with_events(self): + events = [{"type": "UserMessage", "text": "test"}] + extractor = create_span_extractor( + span_format="opentelemetry", events=events, enable_content_capture=False + ) + + assert isinstance(extractor, SpanExtractorV2) + assert extractor.internal_events == events diff --git a/tests/test_span_v2_integration.py b/tests/test_span_v2_integration.py new file mode 100644 index 000000000..e76e2003c --- /dev/null +++ b/tests/test_span_v2_integration.py @@ -0,0 +1,161 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from nemoguardrails import LLMRails, RailsConfig +from nemoguardrails.rails.llm.options import GenerationOptions +from nemoguardrails.tracing import SpanOpentelemetry, create_span_extractor +from nemoguardrails.tracing.spans import LLMSpan, is_typed_span +from tests.utils import FakeLLM + + +@pytest.fixture +def v2_config(): + return RailsConfig.from_content( + yaml_content=""" +models: + - type: main + engine: openai + model: gpt-4 + +tracing: + enabled: true + span_format: opentelemetry + adapters: [] +""" + ) + + +@pytest.fixture +def v1_config(): + return RailsConfig.from_content( + yaml_content=""" +models: + - type: main + engine: openai + model: gpt-4 + +tracing: + enabled: true + span_format: flat + adapters: [] +""" + ) + + +@pytest.fixture +def default_config(): + return RailsConfig.from_content( + yaml_content=""" +models: + - type: main + engine: openai + model: gpt-4 + +tracing: + enabled: true + adapters: [] +""" + ) + + +def test_span_v2_configuration(v2_config): + assert v2_config.tracing.span_format == "opentelemetry" + + llm = FakeLLM(responses=["Hello! I'm here to help."]) + _rails = LLMRails(config=v2_config, llm=llm) + + extractor = create_span_extractor(span_format="opentelemetry") + assert extractor.__class__.__name__ == "SpanExtractorV2" + + +@pytest.mark.asyncio +async def test_v2_spans_generated_with_events(v2_config): + llm = FakeLLM(responses=[" express greeting", "Hello! How can I help you today?"]) + + rails = LLMRails(config=v2_config, llm=llm) + + options = GenerationOptions( + log={"activated_rails": True, "internal_events": True, "llm_calls": True} + ) + + response = await rails.generate_async( + messages=[{"role": "user", "content": "Hello!"}], options=options + ) + + assert response.response is not None + assert response.log is not None + + from nemoguardrails.tracing.interaction_types import ( + InteractionOutput, + extract_interaction_log, + ) + + interaction_output = InteractionOutput( + id="test", input="Hello!", output=response.response + ) + + interaction_log = extract_interaction_log(interaction_output, response.log) + + assert len(interaction_log.trace) > 0 + + for span in interaction_log.trace: + assert is_typed_span(span) + + interaction_span = next( + (s for s in interaction_log.trace if s.name == "guardrails.request"), None + ) + assert interaction_span is not None + + llm_spans = [s for s in interaction_log.trace if isinstance(s, LLMSpan)] + assert len(llm_spans) > 0 + + for llm_span in llm_spans: + assert hasattr(llm_span, "provider_name") + assert hasattr(llm_span, "request_model") + + attrs = llm_span.to_otel_attributes() + assert "gen_ai.provider.name" in attrs + assert "gen_ai.request.model" in attrs + + assert hasattr(llm_span, "events") + assert len(llm_span.events) > 0 + + +def test_v1_backward_compatibility(v1_config): + assert v1_config.tracing.span_format == "flat" + + llm = FakeLLM(responses=["Hello!"]) + _rails = LLMRails(config=v1_config, llm=llm) + + extractor = create_span_extractor(span_format="flat") + assert extractor.__class__.__name__ == "SpanExtractorV1" + + +def test_default_span_format(default_config): + assert default_config.tracing.span_format == "opentelemetry" + + +def test_span_format_configuration_direct(): + extractor_flat = create_span_extractor(span_format="flat") + assert extractor_flat.__class__.__name__ == "SpanExtractorV1" + + extractor_otel = create_span_extractor(span_format="opentelemetry") + assert extractor_otel.__class__.__name__ == "SpanExtractorV2" + + with pytest.raises(ValueError) as exc_info: + create_span_extractor(span_format="invalid") + assert "Invalid span format" in str(exc_info.value) diff --git a/tests/test_span_v2_otel_semantics.py b/tests/test_span_v2_otel_semantics.py new file mode 100644 index 000000000..41a1fb781 --- /dev/null +++ b/tests/test_span_v2_otel_semantics.py @@ -0,0 +1,604 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for SpanOpentelemetry with complete OpenTelemetry semantic convention attributes.""" + +from unittest.mock import MagicMock, patch + +import pytest + +from nemoguardrails.rails.llm.options import ActivatedRail, ExecutedAction, LLMCallInfo +from nemoguardrails.tracing.constants import ( + CommonAttributes, + EventNames, + GenAIAttributes, + GuardrailsAttributes, + OperationNames, + SpanKind, + SpanNames, +) +from nemoguardrails.tracing.span_extractors import SpanExtractorV2 +from nemoguardrails.tracing.spans import ActionSpan, InteractionSpan, LLMSpan, RailSpan + + +class TestSpanOpentelemetryOTelAttributes: + """Test that SpanV2 contains complete OTel semantic convention attributes.""" + + def test_interaction_span_has_complete_attributes(self): + """Test that interaction span has all required OTel attributes.""" + rail = ActivatedRail( + type="input", + name="check_jailbreak", + started_at=1.0, + finished_at=2.0, + duration=1.0, + executed_actions=[], + ) + + extractor = SpanExtractorV2() + spans = extractor.extract_spans([rail]) + + interaction_span = next(s for s in spans if s.parent_id is None) + assert isinstance(interaction_span, InteractionSpan) + + attrs = interaction_span.to_otel_attributes() + assert attrs[CommonAttributes.SPAN_KIND] == SpanKind.SERVER + assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == OperationNames.GUARDRAILS + assert "service.name" in attrs + assert interaction_span.name == SpanNames.GUARDRAILS_REQUEST + + assert GenAIAttributes.GEN_AI_PROVIDER_NAME not in attrs + assert GenAIAttributes.GEN_AI_SYSTEM not in attrs + + def test_rail_span_has_complete_attributes(self): + """Test that rail spans have all required attributes.""" + rail = ActivatedRail( + type="input", + name="check_jailbreak", + started_at=1.0, + finished_at=2.0, + duration=1.0, + stop=True, + decisions=["blocked"], + executed_actions=[], + ) + + extractor = SpanExtractorV2() + spans = extractor.extract_spans([rail]) + + rail_span = next(s for s in spans if s.name == SpanNames.GUARDRAILS_RAIL) + assert isinstance(rail_span, RailSpan) + + attrs = rail_span.to_otel_attributes() + assert attrs[CommonAttributes.SPAN_KIND] == SpanKind.INTERNAL + assert attrs[GuardrailsAttributes.RAIL_TYPE] == "input" + assert attrs[GuardrailsAttributes.RAIL_NAME] == "check_jailbreak" + assert attrs[GuardrailsAttributes.RAIL_STOP] is True + assert attrs[GuardrailsAttributes.RAIL_DECISIONS] == ["blocked"] + + def test_llm_span_has_complete_attributes(self): + """Test that LLM spans have all required OTel GenAI attributes.""" + llm_call = LLMCallInfo( + task="generate", + llm_model_name="gpt-4", + llm_provider_name="openai", + prompt="Hello, world!", + completion="Hi there!", + prompt_tokens=10, + completion_tokens=5, + total_tokens=15, + started_at=1.5, + finished_at=1.8, + duration=0.3, + raw_response={ + "id": "chatcmpl-123", + "choices": [{"finish_reason": "stop"}], + "temperature": 0.7, + "max_tokens": 100, + "top_p": 0.9, + }, + ) + + action = ExecutedAction( + action_name="generate_user_intent", + started_at=1.0, + finished_at=2.0, + duration=1.0, + llm_calls=[llm_call], + ) + + rail = ActivatedRail( + type="dialog", + name="generate_next_step", + started_at=1.0, + finished_at=2.0, + duration=1.0, + executed_actions=[action], + ) + + extractor = SpanExtractorV2() + spans = extractor.extract_spans([rail]) + + llm_span = next(s for s in spans if "gpt-4" in s.name) + assert isinstance(llm_span, LLMSpan) + + attrs = llm_span.to_otel_attributes() + assert attrs[CommonAttributes.SPAN_KIND] == SpanKind.CLIENT + assert attrs[GenAIAttributes.GEN_AI_PROVIDER_NAME] == "openai" + assert attrs[GenAIAttributes.GEN_AI_REQUEST_MODEL] == "gpt-4" + assert attrs[GenAIAttributes.GEN_AI_RESPONSE_MODEL] == "gpt-4" + assert attrs[GenAIAttributes.GEN_AI_OPERATION_NAME] == "generate" + assert attrs[GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert attrs[GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS] == 5 + assert attrs[GenAIAttributes.GEN_AI_USAGE_TOTAL_TOKENS] == 15 + assert attrs[GenAIAttributes.GEN_AI_RESPONSE_ID] == "chatcmpl-123" + assert attrs[GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS] == ["stop"] + assert attrs[GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert attrs[GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert attrs[GenAIAttributes.GEN_AI_REQUEST_TOP_P] == 0.9 + + assert GenAIAttributes.GEN_AI_SYSTEM not in attrs + + def test_llm_span_events_are_complete(self): + """Test that LLM span events follow OTel GenAI conventions.""" + llm_call = LLMCallInfo( + task="chat", + llm_model_name="claude-3", + prompt="What is the weather?", + completion="I cannot access real-time weather data.", + started_at=1.5, + finished_at=1.8, + duration=0.3, + raw_response={"stop_reason": "end_turn"}, + ) + + action = ExecutedAction( + action_name="llm_generate", + started_at=1.0, + finished_at=2.0, + duration=1.0, + llm_calls=[llm_call], + ) + + rail = ActivatedRail( + type="dialog", + name="chat", + started_at=1.0, + finished_at=2.0, + duration=1.0, + executed_actions=[action], + ) + + extractor = SpanExtractorV2(enable_content_capture=True) + spans = extractor.extract_spans([rail]) + + llm_span = next(s for s in spans if "claude" in s.name) + assert isinstance(llm_span, LLMSpan) + + assert len(llm_span.events) >= 2 # at least user and assistant messages + + user_event = next( + e for e in llm_span.events if e.name == EventNames.GEN_AI_CONTENT_PROMPT + ) + assert user_event.body["content"] == "What is the weather?" + + assistant_event = next( + e for e in llm_span.events if e.name == EventNames.GEN_AI_CONTENT_COMPLETION + ) + assert ( + assistant_event.body["content"] == "I cannot access real-time weather data." + ) + + finish_events = [e for e in llm_span.events if e.name == "gen_ai.choice.finish"] + if finish_events: + finish_event = finish_events[0] + assert "finish_reason" in finish_event.body + assert "index" in finish_event.body + + def test_action_span_with_error_attributes(self): + """Test that action spans include error information when present.""" + # TODO: Figure out how errors are properly attached to actions + action = ExecutedAction( + action_name="failed_action", + started_at=1.0, + finished_at=2.0, + duration=1.0, + llm_calls=[], + ) + # skip setting error for now since ExecutedAction doesn't have that field + # action.error = ValueError("Something went wrong") + + rail = ActivatedRail( + type="input", + name="check_input", + started_at=1.0, + finished_at=2.0, + duration=1.0, + executed_actions=[action], + ) + + extractor = SpanExtractorV2() + spans = extractor.extract_spans([rail]) + + action_span = next(s for s in spans if s.name == SpanNames.GUARDRAILS_ACTION) + assert isinstance(action_span, ActionSpan) + + attrs = action_span.to_otel_attributes() + # since we didn't set an error, these shouldn't be present + assert "error" not in attrs or attrs["error"] is None + assert "error.type" not in attrs + assert "error.message" not in attrs + + def test_span_names_are_low_cardinality(self): + """Test that span names follow low-cardinality convention.""" + rails = [ + ActivatedRail( + type="input", + name=f"rail_{i}", + started_at=float(i), + finished_at=float(i + 1), + duration=1.0, + executed_actions=[ + ExecutedAction( + action_name=f"action_{i}", + started_at=float(i), + finished_at=float(i + 1), + duration=1.0, + llm_calls=[ + LLMCallInfo( + task=f"task_{i}", + llm_model_name=f"model_{i}", + started_at=float(i), + finished_at=float(i + 1), + duration=1.0, + ) + ], + ) + ], + ) + for i in range(3) + ] + + extractor = SpanExtractorV2() + all_spans = [] + for rail in rails: + spans = extractor.extract_spans([rail]) + all_spans.extend(spans) + + expected_patterns = { + SpanNames.GUARDRAILS_REQUEST, + SpanNames.GUARDRAILS_RAIL, + SpanNames.GUARDRAILS_ACTION, + } + + for span in all_spans: + if not any(f"model_{i}" in span.name for i in range(3)): + assert span.name in expected_patterns + + rail_spans = [s for s in all_spans if s.name == SpanNames.GUARDRAILS_RAIL] + rail_names = { + s.to_otel_attributes()[GuardrailsAttributes.RAIL_NAME] for s in rail_spans + } + assert len(rail_names) == 3 + + def test_no_semantic_logic_in_adapter(self): + """Verify adapter is just an API bridge by checking it doesn't modify attributes.""" + from opentelemetry import trace + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + + from nemoguardrails.tracing import InteractionLog + from nemoguardrails.tracing.adapters.opentelemetry import OpenTelemetryAdapter + + # create a mock exporter to capture spans + class MockExporter: + def __init__(self): + self.spans = [] + + def export(self, spans): + self.spans.extend(spans) + return 0 + + def shutdown(self): + pass + + # setup OTel + exporter = MockExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + trace.set_tracer_provider(provider) + + # create adapter + adapter = OpenTelemetryAdapter() + + # create a simple rail + rail = ActivatedRail( + type="input", + name="test_rail", + started_at=1.0, + finished_at=2.0, + duration=1.0, + executed_actions=[], + ) + + # extract spans with V2 extractor + extractor = SpanExtractorV2() + spans = extractor.extract_spans([rail]) + + # create interaction log + interaction_log = InteractionLog( + id="test-trace-123", + activated_rails=[rail], + trace=spans, + ) + + # transform through adapter + adapter.transform(interaction_log) + + assert len(exporter.spans) > 0 + + for otel_span in exporter.spans: + attrs = dict(otel_span.attributes) + + if otel_span.name == SpanNames.GUARDRAILS_REQUEST: + assert GenAIAttributes.GEN_AI_OPERATION_NAME in attrs + assert GenAIAttributes.GEN_AI_PROVIDER_NAME not in attrs + assert GenAIAttributes.GEN_AI_SYSTEM not in attrs + + +class TestOpenTelemetryAdapterAsTheBridge: + """Test that OpenTelemetryAdapter is a pure API bridge.""" + + def test_adapter_handles_span_kind_mapping(self): + """Test that adapter correctly maps span.kind string to OTel enum.""" + from opentelemetry.trace import SpanKind as OTelSpanKind + + from nemoguardrails.tracing import InteractionLog + from nemoguardrails.tracing.adapters.opentelemetry import OpenTelemetryAdapter + + # mock provider to capture span creation + created_spans = [] + + class MockTracer: + def start_span(self, name, context=None, start_time=None, kind=None): + created_spans.append({"name": name, "kind": kind}) + return MagicMock() + + provider = MagicMock() + provider.get_tracer = MagicMock(return_value=MockTracer()) + + with patch("opentelemetry.trace.get_tracer_provider", return_value=provider): + adapter = OpenTelemetryAdapter() + + spans = [ + InteractionSpan( + span_id="1", + name="server_span", + start_time=0.0, + end_time=1.0, + duration=1.0, + ), + LLMSpan( + span_id="2", + name="client_span", + start_time=0.0, + end_time=1.0, + duration=1.0, + provider_name="openai", + request_model="gpt-4", + response_model="gpt-4", + operation_name="chat.completions", + ), + RailSpan( + span_id="3", + name="internal_span", + start_time=0.0, + end_time=1.0, + duration=1.0, + rail_type="input", + rail_name="test_rail", + ), + ] + + interaction_log = InteractionLog( + id="test-123", + activated_rails=[], + trace=spans, + ) + + adapter.transform(interaction_log) + + assert created_spans[0]["kind"] == OTelSpanKind.SERVER + assert created_spans[1]["kind"] == OTelSpanKind.CLIENT + assert created_spans[2]["kind"] == OTelSpanKind.INTERNAL + + +class TestContentPrivacy: + """Test that sensitive content is handled according to OTel GenAI conventions.""" + + def test_content_not_included_by_default(self): + """Test that content is NOT included by default per OTel spec.""" + events = [ + {"type": "UserMessage", "text": "My SSN is 123-45-6789"}, + { + "type": "UtteranceBotActionFinished", + "final_script": "I cannot process SSN", + }, + ] + extractor = SpanExtractorV2(events=events, enable_content_capture=False) + + activated_rail = ActivatedRail( + type="action", + name="generate", + started_at=0.0, + finished_at=1.0, + duration=1.0, + executed_actions=[ + ExecutedAction( + action_name="generate", + started_at=0.0, + finished_at=1.0, + duration=1.0, + llm_calls=[ + LLMCallInfo( + task="general", + prompt="User sensitive prompt", + completion="Bot response with PII", + duration=0.5, + total_tokens=100, + prompt_tokens=50, + completion_tokens=50, + raw_response={"model": "gpt-3.5-turbo"}, + ) + ], + ) + ], + ) + + spans = extractor.extract_spans([activated_rail]) + + llm_span = next((s for s in spans if isinstance(s, LLMSpan)), None) + assert llm_span is not None + + for event in llm_span.events: + if event.name in ["gen_ai.content.prompt", "gen_ai.content.completion"]: + assert event.body == {} + assert "content" not in event.body + + def test_content_included_when_explicitly_enabled(self): + """Test that content IS included when explicitly enabled.""" + # Create extractor with enable_content_capture=True + events = [ + {"type": "UserMessage", "text": "Hello bot"}, + {"type": "UtteranceBotActionFinished", "final_script": "Hello user"}, + ] + extractor = SpanExtractorV2(events=events, enable_content_capture=True) + + activated_rail = ActivatedRail( + type="action", + name="generate", + started_at=0.0, + finished_at=1.0, + duration=1.0, + executed_actions=[ + ExecutedAction( + action_name="generate", + started_at=0.0, + finished_at=1.0, + duration=1.0, + llm_calls=[ + LLMCallInfo( + task="general", + prompt="Test prompt", + completion="Test response", + duration=0.5, + total_tokens=100, + prompt_tokens=50, + completion_tokens=50, + raw_response={"model": "gpt-3.5-turbo"}, + ) + ], + ) + ], + ) + + spans = extractor.extract_spans([activated_rail]) + + llm_span = next((s for s in spans if isinstance(s, LLMSpan)), None) + assert llm_span is not None + + prompt_event = next( + (e for e in llm_span.events if e.name == "gen_ai.content.prompt"), None + ) + assert prompt_event is not None + assert prompt_event.body.get("content") == "Test prompt" + + completion_event = next( + (e for e in llm_span.events if e.name == "gen_ai.content.completion"), None + ) + assert completion_event is not None + assert completion_event.body.get("content") == "Test response" + + def test_conversation_events_respect_privacy_setting(self): + """Test that guardrails internal events respect the privacy setting.""" + events = [ + {"type": "UserMessage", "text": "Private message"}, + { + "type": "UtteranceBotActionFinished", + "final_script": "Private response", + "is_success": True, + }, + ] + + extractor_no_content = SpanExtractorV2( + events=events, enable_content_capture=False + ) + activated_rail = ActivatedRail( + type="dialog", name="main", started_at=0.0, finished_at=1.0, duration=1.0 + ) + + spans = extractor_no_content.extract_spans([activated_rail]) + interaction_span = spans[0] # First span is the interaction span + + user_event = next( + (e for e in interaction_span.events if e.name == "guardrails.user_message"), + None, + ) + assert user_event is not None + assert user_event.body["type"] == "UserMessage" + assert "content" not in user_event.body + + bot_event = next( + ( + e + for e in interaction_span.events + if e.name == "guardrails.utterance.bot.finished" + ), + None, + ) + assert bot_event is not None + assert bot_event.body["type"] == "UtteranceBotActionFinished" + assert bot_event.body["is_success"] == True + assert "content" not in bot_event.body # Content excluded + + extractor_with_content = SpanExtractorV2( + events=events, enable_content_capture=True + ) + spans = extractor_with_content.extract_spans([activated_rail]) + interaction_span = spans[0] + + user_event = next( + (e for e in interaction_span.events if e.name == "guardrails.user_message"), + None, + ) + assert user_event is not None + assert user_event.body.get("content") == "Private message" + + bot_event = next( + ( + e + for e in interaction_span.events + if e.name == "guardrails.utterance.bot.finished" + ), + None, + ) + assert bot_event is not None + assert bot_event.body.get("content") == "Private response" + assert bot_event.body.get("type") == "UtteranceBotActionFinished" + assert bot_event.body.get("is_success") == True + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tests/test_spans.py b/tests/test_spans.py new file mode 100644 index 000000000..448f362c7 --- /dev/null +++ b/tests/test_spans.py @@ -0,0 +1,98 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest + +from nemoguardrails.tracing import SpanEvent, SpanFlat +from nemoguardrails.tracing.spans import LLMSpan, is_typed_span + + +class TestSpanModels: + """Test the span models for flat and OpenTelemetry formats.""" + + def test_span_flat_creation(self): + """Test creating a flat format span.""" + span = SpanFlat( + span_id="test-123", + name="test span", + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={"test_metric": 42}, + ) + + assert span.span_id == "test-123" + assert span.name == "test span" + assert span.duration == 1.0 + assert span.metrics["test_metric"] == 42 + + # Flat spans don't have OpenTelemetry attributes + assert not hasattr(span, "attributes") + assert not hasattr(span, "events") + assert not hasattr(span, "otel_metrics") + + def test_span_opentelemetry_creation(self): + """Test creating an OpenTelemetry format span - typed spans with explicit fields.""" + event = SpanEvent( + name="gen_ai.content.prompt", timestamp=0.5, body={"content": "test prompt"} + ) + + # OpenTelemetry spans are typed with explicit fields + span = LLMSpan( + span_id="test-456", + name="generate_user_intent gpt-4", + start_time=0.0, + end_time=2.0, + duration=2.0, + provider_name="openai", + request_model="gpt-4", + response_model="gpt-4", + operation_name="chat.completions", + usage_input_tokens=10, + usage_output_tokens=20, + usage_total_tokens=30, + events=[event], + ) + + assert span.span_id == "test-456" + assert span.provider_name == "openai" + assert span.request_model == "gpt-4" + assert span.usage_input_tokens == 10 + assert len(span.events) == 1 + assert span.events[0].name == "gen_ai.content.prompt" + + attributes = span.to_otel_attributes() + assert attributes["gen_ai.provider.name"] == "openai" + assert attributes["gen_ai.request.model"] == "gpt-4" + + def test_span_flat_model_is_simple(self): + """Test that Flat span model is a simple span without OpenTelemetry features.""" + flat_span = SpanFlat( + span_id="flat-123", + name="test", + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={"metric": 1}, + ) + + assert isinstance(flat_span, SpanFlat) + assert flat_span.span_id == "flat-123" + assert flat_span.metrics["metric"] == 1 + + # Flat spans don't have OpenTelemetry attributes or events + assert not hasattr(flat_span, "attributes") + assert not hasattr(flat_span, "events") diff --git a/tests/test_tracing_adapters_filesystem.py b/tests/test_tracing_adapters_filesystem.py index df4a470c9..7c901a6f6 100644 --- a/tests/test_tracing_adapters_filesystem.py +++ b/tests/test_tracing_adapters_filesystem.py @@ -19,10 +19,8 @@ import os import tempfile import unittest -from unittest.mock import MagicMock -from nemoguardrails.eval.models import Span -from nemoguardrails.tracing import InteractionLog +from nemoguardrails.tracing import InteractionLog, SpanFlat from nemoguardrails.tracing.adapters.filesystem import FileSystemAdapter @@ -53,7 +51,7 @@ def test_transform(self): activated_rails=[], events=[], trace=[ - Span( + SpanFlat( name="test_span", span_id="span_1", parent_id=None, @@ -87,7 +85,7 @@ async def run_test(): activated_rails=[], events=[], trace=[ - Span( + SpanFlat( name="test_span", span_id="span_1", parent_id=None, diff --git a/tests/test_tracing_adapters_opentelemetry.py b/tests/test_tracing_adapters_opentelemetry.py index ee1a5a667..455d9c213 100644 --- a/tests/test_tracing_adapters_opentelemetry.py +++ b/tests/test_tracing_adapters_opentelemetry.py @@ -19,23 +19,18 @@ from importlib.metadata import version from unittest.mock import MagicMock, patch -# TODO: check to see if we can add it as a dependency -# but now we try to import opentelemetry and set a flag if it's not available -try: - from opentelemetry.sdk.trace import TracerProvider - from opentelemetry.trace import NoOpTracerProvider +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.trace import NoOpTracerProvider - from nemoguardrails.tracing.adapters.opentelemetry import OpenTelemetryAdapter +from nemoguardrails.tracing import ( + InteractionLog, + SpanEvent, + SpanFlat, + SpanOpentelemetry, +) +from nemoguardrails.tracing.adapters.opentelemetry import OpenTelemetryAdapter - OPENTELEMETRY_AVAILABLE = True -except ImportError: - OPENTELEMETRY_AVAILABLE = False -from nemoguardrails.eval.models import Span -from nemoguardrails.tracing import InteractionLog - - -@unittest.skipIf(not OPENTELEMETRY_AVAILABLE, "opentelemetry is not available") class TestOpenTelemetryAdapter(unittest.TestCase): def setUp(self): # Set up a mock tracer provider for testing @@ -73,18 +68,21 @@ def test_initialization(self): self.assertEqual(self.adapter.tracer, self.mock_tracer) def test_transform(self): - """Test that transform creates spans correctly.""" + """Test that transform creates spans correctly with proper timing.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + interaction_log = InteractionLog( id="test_id", activated_rails=[], events=[], trace=[ - Span( + SpanFlat( name="test_span", span_id="span_1", parent_id=None, - start_time=0.0, - end_time=1.0, + start_time=1234567890.5, # historical timestamp + end_time=1234567891.5, # historical timestamp duration=1.0, metrics={"key": 123}, ) @@ -93,38 +91,50 @@ def test_transform(self): self.adapter.transform(interaction_log) - # Verify that start_as_current_span was called - self.mock_tracer.start_as_current_span.assert_called_once_with( - "test_span", - context=None, - ) - - # We retrieve the mock span instance here - span_instance = ( - self.mock_tracer.start_as_current_span.return_value.__enter__.return_value - ) - - # Verify span attributes were set - span_instance.set_attribute.assert_any_call("key", 123) - span_instance.set_attribute.assert_any_call("span_id", "span_1") - span_instance.set_attribute.assert_any_call("trace_id", "test_id") - span_instance.set_attribute.assert_any_call("start_time", 0.0) - span_instance.set_attribute.assert_any_call("end_time", 1.0) - span_instance.set_attribute.assert_any_call("duration", 1.0) + # Verify that start_span was called with proper timing (not start_as_current_span) + call_args = self.mock_tracer.start_span.call_args + self.assertEqual(call_args[0][0], "test_span") # name + self.assertEqual(call_args[1]["context"], None) # no parent context + # Verify start_time is a reasonable absolute timestamp in nanoseconds + start_time_ns = call_args[1]["start_time"] + self.assertIsInstance(start_time_ns, int) + self.assertGreater( + start_time_ns, 1e15 + ) # Should be realistic Unix timestamp in ns + + # V1 span metrics are set directly without prefix + mock_span.set_attribute.assert_any_call("key", 123) + # The adapter no longer sets intrinsic IDs as attributes + # (span_id, trace_id, duration are intrinsic to OTel spans) + + # Verify span was ended with correct end time + end_call_args = mock_span.end.call_args + end_time_ns = end_call_args[1]["end_time"] + self.assertIsInstance(end_time_ns, int) + self.assertGreater(end_time_ns, start_time_ns) # End should be after start + # Verify duration is approximately correct (allowing for conversion precision) + duration_ns = end_time_ns - start_time_ns + expected_duration_ns = int(1.0 * 1_000_000_000) # 1 second + self.assertAlmostEqual( + duration_ns, expected_duration_ns, delta=1000000 + ) # 1ms tolerance def test_transform_span_attributes_various_types(self): """Test that different attribute types are handled correctly.""" + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + interaction_log = InteractionLog( id="test_id", activated_rails=[], events=[], trace=[ - Span( + SpanFlat( name="test_span", span_id="span_1", parent_id=None, - start_time=0.0, - end_time=1.0, + start_time=1234567890.0, + end_time=1234567891.0, duration=1.0, metrics={ "int_key": 42, @@ -138,19 +148,17 @@ def test_transform_span_attributes_various_types(self): self.adapter.transform(interaction_log) - span_instance = ( - self.mock_tracer.start_as_current_span.return_value.__enter__.return_value - ) - - span_instance.set_attribute.assert_any_call("int_key", 42) - span_instance.set_attribute.assert_any_call("float_key", 3.14) - span_instance.set_attribute.assert_any_call("str_key", 123) - span_instance.set_attribute.assert_any_call("bool_key", 1) - span_instance.set_attribute.assert_any_call("span_id", "span_1") - span_instance.set_attribute.assert_any_call("trace_id", "test_id") - span_instance.set_attribute.assert_any_call("start_time", 0.0) - span_instance.set_attribute.assert_any_call("end_time", 1.0) - span_instance.set_attribute.assert_any_call("duration", 1.0) + mock_span.set_attribute.assert_any_call("int_key", 42) + mock_span.set_attribute.assert_any_call("float_key", 3.14) + mock_span.set_attribute.assert_any_call("str_key", 123) + mock_span.set_attribute.assert_any_call("bool_key", 1) + # The adapter no longer sets intrinsic IDs as attributes + # (span_id, trace_id, duration are intrinsic to OTel spans) + # Verify span was ended + mock_span.end.assert_called_once() + end_call_args = mock_span.end.call_args + self.assertIn("end_time", end_call_args[1]) + self.assertIsInstance(end_call_args[1]["end_time"], int) def test_transform_with_empty_trace(self): """Test transform with empty trace.""" @@ -163,23 +171,23 @@ def test_transform_with_empty_trace(self): self.adapter.transform(interaction_log) - self.mock_tracer.start_as_current_span.assert_not_called() + self.mock_tracer.start_span.assert_not_called() def test_transform_with_tracer_failure(self): """Test transform when tracer fails.""" - self.mock_tracer.start_as_current_span.side_effect = Exception("Tracer failure") + self.mock_tracer.start_span.side_effect = Exception("Tracer failure") interaction_log = InteractionLog( id="test_id", activated_rails=[], events=[], trace=[ - Span( + SpanFlat( name="test_span", span_id="span_1", parent_id=None, - start_time=0.0, - end_time=1.0, + start_time=1234567890.0, + end_time=1234567891.0, duration=1.0, metrics={"key": 123}, ) @@ -191,21 +199,101 @@ def test_transform_with_tracer_failure(self): self.assertIn("Tracer failure", str(context.exception)) + def test_transform_with_parent_child_relationships(self): + """Test that parent-child relationships are preserved with correct timing.""" + parent_mock_span = MagicMock() + child_mock_span = MagicMock() + self.mock_tracer.start_span.side_effect = [parent_mock_span, child_mock_span] + + interaction_log = InteractionLog( + id="test_id", + activated_rails=[], + events=[], + trace=[ + SpanFlat( + name="parent_span", + span_id="span_1", + parent_id=None, + start_time=1234567890.0, + end_time=1234567892.0, + duration=2.0, + metrics={"parent_key": 1}, + ), + SpanFlat( + name="child_span", + span_id="span_2", + parent_id="span_1", + start_time=1234567890.5, # child starts after parent + end_time=1234567891.5, # child ends before parent + duration=1.0, + metrics={"child_key": 2}, + ), + ], + ) + + with patch( + "opentelemetry.trace.set_span_in_context" + ) as mock_set_span_in_context: + mock_set_span_in_context.return_value = "parent_context" + + self.adapter.transform(interaction_log) + + # verify parent span created first with no context + self.assertEqual(self.mock_tracer.start_span.call_count, 2) + first_call = self.mock_tracer.start_span.call_args_list[0] + self.assertEqual(first_call[0][0], "parent_span") # name + self.assertEqual(first_call[1]["context"], None) # no parent context + # Verify start_time is a reasonable absolute timestamp + start_time_ns = first_call[1]["start_time"] + self.assertIsInstance(start_time_ns, int) + self.assertGreater( + start_time_ns, 1e15 + ) # Should be realistic Unix timestamp in ns + + # verify child span created with parent context + second_call = self.mock_tracer.start_span.call_args_list[1] + self.assertEqual(second_call[0][0], "child_span") # name + self.assertEqual( + second_call[1]["context"], "parent_context" + ) # parent context + # Verify child start_time is also a reasonable absolute timestamp + child_start_time_ns = second_call[1]["start_time"] + self.assertIsInstance(child_start_time_ns, int) + self.assertGreater( + child_start_time_ns, 1e15 + ) # Should be realistic Unix timestamp in ns + + # verify parent context was set correctly + mock_set_span_in_context.assert_called_once_with(parent_mock_span) + + # verify both spans ended with reasonable times + parent_mock_span.end.assert_called_once() + child_mock_span.end.assert_called_once() + parent_end_time = parent_mock_span.end.call_args[1]["end_time"] + child_end_time = child_mock_span.end.call_args[1]["end_time"] + self.assertIsInstance(parent_end_time, int) + self.assertIsInstance(child_end_time, int) + self.assertGreater(parent_end_time, 1e15) + self.assertGreater(child_end_time, 1e15) + def test_transform_async(self): """Test async transform functionality.""" async def run_test(): + mock_span = MagicMock() + self.mock_tracer.start_span.return_value = mock_span + interaction_log = InteractionLog( id="test_id", activated_rails=[], events=[], trace=[ - Span( + SpanFlat( name="test_span", span_id="span_1", parent_id=None, - start_time=0.0, - end_time=1.0, + start_time=1234567890.5, + end_time=1234567891.5, duration=1.0, metrics={"key": 123}, ) @@ -214,22 +302,19 @@ async def run_test(): await self.adapter.transform_async(interaction_log) - self.mock_tracer.start_as_current_span.assert_called_once_with( - "test_span", - context=None, - ) - - # We retrieve the mock span instance here - span_instance = ( - self.mock_tracer.start_as_current_span.return_value.__enter__.return_value - ) + call_args = self.mock_tracer.start_span.call_args + self.assertEqual(call_args[0][0], "test_span") + self.assertEqual(call_args[1]["context"], None) + # Verify start_time is reasonable + self.assertIsInstance(call_args[1]["start_time"], int) + self.assertGreater(call_args[1]["start_time"], 1e15) - span_instance.set_attribute.assert_any_call("key", 123) - span_instance.set_attribute.assert_any_call("span_id", "span_1") - span_instance.set_attribute.assert_any_call("trace_id", "test_id") - span_instance.set_attribute.assert_any_call("start_time", 0.0) - span_instance.set_attribute.assert_any_call("end_time", 1.0) - span_instance.set_attribute.assert_any_call("duration", 1.0) + mock_span.set_attribute.assert_any_call("key", 123) + # The adapter no longer sets intrinsic IDs as attributes + # (span_id, trace_id, duration are intrinsic to OTel spans) + mock_span.end.assert_called_once() + self.assertIn("end_time", mock_span.end.call_args[1]) + self.assertIsInstance(mock_span.end.call_args[1]["end_time"], int) asyncio.run(run_test()) @@ -246,13 +331,13 @@ async def run_test(): await self.adapter.transform_async(interaction_log) - self.mock_tracer.start_as_current_span.assert_not_called() + self.mock_tracer.start_span.assert_not_called() asyncio.run(run_test()) def test_transform_async_with_tracer_failure(self): """Test async transform when tracer fails.""" - self.mock_tracer.start_as_current_span.side_effect = Exception("Tracer failure") + self.mock_tracer.start_span.side_effect = Exception("Tracer failure") async def run_test(): interaction_log = InteractionLog( @@ -260,12 +345,12 @@ async def run_test(): activated_rails=[], events=[], trace=[ - Span( + SpanFlat( name="test_span", span_id="span_1", parent_id=None, - start_time=0.0, - end_time=1.0, + start_time=1234567890.0, + end_time=1234567891.0, duration=1.0, metrics={"key": 123}, ) @@ -279,41 +364,6 @@ async def run_test(): asyncio.run(run_test()) - def test_backward_compatibility_with_old_config(self): - """Test that old configuration parameters are still accepted.""" - # This should not fail even if old parameters are passed - adapter = OpenTelemetryAdapter( - service_name="test_service", - exporter="console", # this should be ignored gracefully - resource_attributes={"test": "value"}, # this should be ignored gracefully - ) - - # Should still create the adapter successfully - self.assertIsInstance(adapter, OpenTelemetryAdapter) - self.assertEqual(adapter.tracer, self.mock_tracer) - - def test_deprecation_warning_for_old_parameters(self): - """Test that deprecation warnings are raised for old configuration parameters.""" - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - - # adapter with deprecated parameters - _adapter = OpenTelemetryAdapter( - service_name="test_service", - exporter="console", - resource_attributes={"test": "value"}, - span_processor=MagicMock(), - ) - - # deprecation warning is issued - self.assertEqual(len(w), 1) - self.assertTrue(issubclass(w[0].category, DeprecationWarning)) - self.assertIn("deprecated", str(w[0].message)) - self.assertIn("exporter", str(w[0].message)) - self.assertIn("resource_attributes", str(w[0].message)) - self.assertIn("span_processor", str(w[0].message)) - def test_no_op_tracer_provider_warning(self): """Test that a warning is issued when NoOpTracerProvider is detected.""" @@ -343,24 +393,72 @@ def test_no_warnings_with_proper_configuration(self): # no warnings is issued self.assertEqual(len(w), 0) - def test_register_otel_exporter_deprecation(self): - """Test that register_otel_exporter shows deprecation warning.""" - from nemoguardrails.tracing.adapters.opentelemetry import register_otel_exporter - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") + def test_v1_spans_unique_timestamps_regression(self): + """Regression test: V1 spans should have unique timestamps. + + This test ensures the timestamp bug is fixed for V1 spans. + With the bug, all spans would have the same end_time_ns. + """ + created_spans = [] + + def track_span(*args, **kwargs): + span = MagicMock() + created_spans.append(span) + return span + + self.mock_tracer.start_span.side_effect = track_span + + # Create multiple V1 spans with different end times + spans = [] + for i in range(5): + spans.append( + SpanFlat( + name=f"v1_span_{i}", + span_id=str(i), + start_time=float(i * 0.1), # 0, 0.1, 0.2, 0.3, 0.4 + end_time=float(0.5 + i * 0.2), # 0.5, 0.7, 0.9, 1.1, 1.3 + duration=float(0.5 + i * 0.2 - i * 0.1), + metrics={"index": i}, + ) + ) - mock_exporter_cls = MagicMock() + interaction_log = InteractionLog( + id="v1_regression_test", + activated_rails=[], + events=[], + trace=spans, + ) - register_otel_exporter("test-exporter", mock_exporter_cls) + # Use fixed time for predictable results + import time - self.assertEqual(len(w), 1) - self.assertTrue(issubclass(w[0].category, DeprecationWarning)) - self.assertIn("register_otel_exporter is deprecated", str(w[0].message)) - self.assertIn("0.16.0", str(w[0].message)) + with patch("time.time_ns", return_value=8000000000_000_000_000): + self.adapter.transform(interaction_log) - from nemoguardrails.tracing.adapters.opentelemetry import ( - _exporter_name_cls_map, - ) + # Extract all end times + end_times = [] + for span_mock in created_spans: + end_time = span_mock.end.call_args[1]["end_time"] + end_times.append(end_time) + + # CRITICAL: All end times MUST be different + unique_end_times = set(end_times) + self.assertEqual( + len(unique_end_times), + 5, + f"REGRESSION DETECTED: All V1 span end times should be unique! " + f"Got {len(unique_end_times)} unique values from {end_times}. " + f"The timestamp calculation bug has regressed.", + ) - self.assertEqual(_exporter_name_cls_map["test-exporter"], mock_exporter_cls) + # Verify expected values + base_ns = 8000000000_000_000_000 + expected_end_times = [ + base_ns + int(0.5 * 1_000_000_000), + base_ns + int(0.7 * 1_000_000_000), + base_ns + int(0.9 * 1_000_000_000), + base_ns + int(1.1 * 1_000_000_000), + base_ns + int(1.3 * 1_000_000_000), + ] + + self.assertEqual(end_times, expected_end_times) From 4849fe1c7ae26795df0b60984f33c6b9d2b8cfc1 Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Thu, 14 Aug 2025 15:20:49 +0200 Subject: [PATCH 02/10] fix: Python 3.9 compatibility for isinstance checks with Union types - Replace isinstance(span, TypedSpan) with explicit tuple of types - TypedSpan is a Union type which cannot be used with isinstance in Python 3.9 - Update test to check for specific LLMSpan type instead of Union - Fixes TypeError: Subscripted generics cannot be used with class and instance checks --- nemoguardrails/tracing/spans.py | 3 ++- tests/test_span_models_and_extractors.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/nemoguardrails/tracing/spans.py b/nemoguardrails/tracing/spans.py index 87373bdbe..3a21c3024 100644 --- a/nemoguardrails/tracing/spans.py +++ b/nemoguardrails/tracing/spans.py @@ -351,4 +351,5 @@ def is_typed_span(span: Any) -> bool: Returns: True if the object is a typed span, False otherwise """ - return isinstance(span, TypedSpan) + # Python 3.9 compatibility: cannot use isinstance with Union types + return isinstance(span, (InteractionSpan, RailSpan, ActionSpan, LLMSpan)) diff --git a/tests/test_span_models_and_extractors.py b/tests/test_span_models_and_extractors.py index 3fe0f35d0..71d1f7475 100644 --- a/tests/test_span_models_and_extractors.py +++ b/tests/test_span_models_and_extractors.py @@ -88,7 +88,9 @@ def test_span_v2_creation(self): assert attributes["gen_ai.request.model"] == "gpt-4" assert not isinstance(span, SpanFlat) - assert isinstance(span, SpanOpentelemetry) + # Python 3.9 compatibility: cannot use isinstance with Union types + # SpanOpentelemetry is TypedSpan which is a Union, so check the actual type + assert isinstance(span, LLMSpan) # Note: V1 and V2 spans are now fundamentally different types # V1 is a simple span model, V2 is typed spans with explicit fields From 005f3d17a0b82589d3e276c771db64f8a0940b5a Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Fri, 15 Aug 2025 10:26:47 +0200 Subject: [PATCH 03/10] feat: add support for otel span format with filesystem adapter --- nemoguardrails/tracing/adapters/filesystem.py | 89 +++-- tests/test_tracing_adapters_filesystem.py | 333 ++++++++++++++++++ 2 files changed, 396 insertions(+), 26 deletions(-) diff --git a/nemoguardrails/tracing/adapters/filesystem.py b/nemoguardrails/tracing/adapters/filesystem.py index 3e99398b8..3f647a0cb 100644 --- a/nemoguardrails/tracing/adapters/filesystem.py +++ b/nemoguardrails/tracing/adapters/filesystem.py @@ -18,7 +18,7 @@ import json import os -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Any, Dict, Optional if TYPE_CHECKING: from nemoguardrails.tracing import InteractionLog @@ -28,6 +28,7 @@ class FileSystemAdapter(InteractionLogAdapter): name = "FileSystem" + SCHEMA_VERSION = "2.0" def __init__(self, filepath: Optional[str] = None): if not filepath: @@ -36,58 +37,94 @@ def __init__(self, filepath: Optional[str] = None): self.filepath = os.path.abspath(filepath) os.makedirs(os.path.dirname(self.filepath), exist_ok=True) + def _extract_span_data(self, span_data) -> Dict[str, Any]: + """Extract all available data from a span.""" + # Start with common fields that all spans have + span_dict = { + "name": span_data.name, + "span_id": span_data.span_id, + "parent_id": span_data.parent_id, + "start_time": span_data.start_time, + "end_time": span_data.end_time, + "duration": span_data.duration, + } + + # Add span type for debugging + span_dict["span_type"] = span_data.__class__.__name__ + + # Handle v1 spans (SpanFlat) - they have metrics + if hasattr(span_data, "metrics") and span_data.metrics: + span_dict["metrics"] = span_data.metrics + + # Handle v2 spans - they have span_kind + if hasattr(span_data, "span_kind"): + span_dict["span_kind"] = span_data.span_kind + + # Extract events if present + if hasattr(span_data, "events") and span_data.events: + span_dict["events"] = [ + { + "name": event.name, + "timestamp": event.timestamp, + "attributes": event.attributes, + } + for event in span_data.events + ] + + # Extract error information if present + if hasattr(span_data, "error") and span_data.error: + span_dict["error"] = { + "occurred": span_data.error, + "type": getattr(span_data, "error_type", None), + "message": getattr(span_data, "error_message", None), + } + + # Extract OpenTelemetry attributes if available + if hasattr(span_data, "to_otel_attributes"): + span_dict["attributes"] = span_data.to_otel_attributes() + + # Include custom attributes if present + if hasattr(span_data, "custom_attributes") and span_data.custom_attributes: + span_dict["custom_attributes"] = span_data.custom_attributes + + return span_dict + def transform(self, interaction_log: "InteractionLog"): """Transforms the InteractionLog into a JSON string.""" spans = [] for span_data in interaction_log.trace: - span_dict = { - "name": span_data.name, - "span_id": span_data.span_id, - "parent_id": span_data.parent_id, - "trace_id": interaction_log.id, - "start_time": span_data.start_time, - "end_time": span_data.end_time, - "duration": span_data.duration, - "metrics": span_data.metrics, - } + span_dict = self._extract_span_data(span_data) spans.append(span_dict) log_dict = { + "schema_version": self.SCHEMA_VERSION, "trace_id": interaction_log.id, "spans": spans, } - with open(self.filepath, "a") as f: - f.write(json.dumps(log_dict, indent=2) + "\n") + with open(self.filepath, "a", encoding="utf-8") as f: + f.write(json.dumps(log_dict) + "\n") async def transform_async(self, interaction_log: "InteractionLog"): try: import aiofiles except ImportError: raise ImportError( - "aiofiles is required for async file writing. Please install it using `pip install aiofiles" + "aiofiles is required for async file writing. Please install it using `pip install aiofiles`" ) spans = [] for span_data in interaction_log.trace: - span_dict = { - "name": span_data.name, - "span_id": span_data.span_id, - "parent_id": span_data.parent_id, - "trace_id": interaction_log.id, - "start_time": span_data.start_time, - "end_time": span_data.end_time, - "duration": span_data.duration, - "metrics": span_data.metrics, - } + span_dict = self._extract_span_data(span_data) spans.append(span_dict) log_dict = { + "schema_version": self.SCHEMA_VERSION, "trace_id": interaction_log.id, "spans": spans, } - async with aiofiles.open(self.filepath, "a") as f: - await f.write(json.dumps(log_dict, indent=2) + "\n") + async with aiofiles.open(self.filepath, "a", encoding="utf-8") as f: + await f.write(json.dumps(log_dict) + "\n") diff --git a/tests/test_tracing_adapters_filesystem.py b/tests/test_tracing_adapters_filesystem.py index 7c901a6f6..d79b14c4d 100644 --- a/tests/test_tracing_adapters_filesystem.py +++ b/tests/test_tracing_adapters_filesystem.py @@ -22,6 +22,13 @@ from nemoguardrails.tracing import InteractionLog, SpanFlat from nemoguardrails.tracing.adapters.filesystem import FileSystemAdapter +from nemoguardrails.tracing.spans import ( + ActionSpan, + InteractionSpan, + LLMSpan, + RailSpan, + SpanEvent, +) class TestFileSystemAdapter(unittest.TestCase): @@ -107,3 +114,329 @@ async def run_test(): self.assertEqual(log_dict["spans"][0]["name"], "test_span") asyncio.run(run_test()) + + def test_schema_version(self): + adapter = FileSystemAdapter(filepath=self.filepath) + interaction_log = InteractionLog( + id="test_id", + activated_rails=[], + events=[], + trace=[ + SpanFlat( + name="test_span", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={}, + ) + ], + ) + adapter.transform(interaction_log) + + with open(self.filepath, "r") as f: + content = f.read() + log_dict = json.loads(content.strip()) + self.assertEqual(log_dict["schema_version"], "2.0") + + def test_span_flat_with_metrics(self): + adapter = FileSystemAdapter(filepath=self.filepath) + interaction_log = InteractionLog( + id="test_trace", + activated_rails=[], + events=[], + trace=[ + SpanFlat( + name="llm_call", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.5, + duration=1.5, + metrics={ + "input_tokens": 10, + "output_tokens": 20, + "total_tokens": 30, + }, + ) + ], + ) + adapter.transform(interaction_log) + + with open(self.filepath, "r") as f: + content = f.read() + log_dict = json.loads(content.strip()) + span = log_dict["spans"][0] + self.assertEqual(span["span_type"], "SpanFlat") + self.assertIn("metrics", span) + self.assertEqual(span["metrics"]["input_tokens"], 10) + self.assertEqual(span["metrics"]["output_tokens"], 20) + self.assertEqual(span["metrics"]["total_tokens"], 30) + + def test_interaction_span_with_events(self): + adapter = FileSystemAdapter(filepath=self.filepath) + events = [ + SpanEvent( + name="gen_ai.content.prompt", + timestamp=0.1, + attributes={"gen_ai.prompt": "Hello, how are you?"}, + ), + SpanEvent( + name="gen_ai.content.completion", + timestamp=1.9, + attributes={"gen_ai.completion": "I'm doing well, thank you!"}, + ), + ] + interaction_log = InteractionLog( + id="test_trace", + activated_rails=[], + events=[], + trace=[ + InteractionSpan( + name="interaction", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=2.0, + duration=2.0, + span_kind="server", + request_model="gpt-4", + events=events, + ) + ], + ) + adapter.transform(interaction_log) + + with open(self.filepath, "r") as f: + content = f.read() + log_dict = json.loads(content.strip()) + span = log_dict["spans"][0] + self.assertEqual(span["span_type"], "InteractionSpan") + self.assertEqual(span["span_kind"], "server") + self.assertIn("events", span) + self.assertEqual(len(span["events"]), 2) + self.assertEqual(span["events"][0]["name"], "gen_ai.content.prompt") + self.assertEqual(span["events"][0]["timestamp"], 0.1) + self.assertIn("attributes", span) + self.assertIn("gen_ai.operation.name", span["attributes"]) + + def test_rail_span_with_attributes(self): + adapter = FileSystemAdapter(filepath=self.filepath) + interaction_log = InteractionLog( + id="test_trace", + activated_rails=[], + events=[], + trace=[ + RailSpan( + name="check_jailbreak", + span_id="span_1", + parent_id="parent_span", + start_time=0.5, + end_time=1.0, + duration=0.5, + span_kind="internal", + rail_type="input", + rail_name="check_jailbreak", + rail_stop=False, + rail_decisions=["allow"], + ) + ], + ) + adapter.transform(interaction_log) + + with open(self.filepath, "r") as f: + content = f.read() + log_dict = json.loads(content.strip()) + span = log_dict["spans"][0] + self.assertEqual(span["span_type"], "RailSpan") + self.assertEqual(span["span_kind"], "internal") + self.assertEqual(span["parent_id"], "parent_span") + self.assertIn("attributes", span) + self.assertEqual(span["attributes"]["rail.type"], "input") + self.assertEqual(span["attributes"]["rail.name"], "check_jailbreak") + self.assertEqual(span["attributes"]["rail.stop"], False) + self.assertEqual(span["attributes"]["rail.decisions"], ["allow"]) + + def test_action_span_with_error(self): + adapter = FileSystemAdapter(filepath=self.filepath) + interaction_log = InteractionLog( + id="test_trace", + activated_rails=[], + events=[], + trace=[ + ActionSpan( + name="execute_action", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=0.5, + duration=0.5, + span_kind="internal", + action_name="fetch_data", + action_params={"url": "https://api.example.com"}, + error=True, + error_type="ConnectionError", + error_message="Failed to connect to API", + ) + ], + ) + adapter.transform(interaction_log) + + with open(self.filepath, "r") as f: + content = f.read() + log_dict = json.loads(content.strip()) + span = log_dict["spans"][0] + self.assertEqual(span["span_type"], "ActionSpan") + self.assertIn("error", span) + self.assertEqual(span["error"]["occurred"], True) + self.assertEqual(span["error"]["type"], "ConnectionError") + self.assertEqual(span["error"]["message"], "Failed to connect to API") + self.assertIn("attributes", span) + self.assertEqual(span["attributes"]["action.name"], "fetch_data") + + def test_llm_span_with_custom_attributes(self): + adapter = FileSystemAdapter(filepath=self.filepath) + interaction_log = InteractionLog( + id="test_trace", + activated_rails=[], + events=[], + trace=[ + LLMSpan( + name="llm_api_call", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + span_kind="client", + provider_name="openai", + operation_name="chat.completions", + request_model="gpt-4", + temperature=0.7, + response_model="gpt-4-0613", + usage_input_tokens=50, + usage_output_tokens=100, + custom_attributes={"custom_key": "custom_value"}, + ) + ], + ) + adapter.transform(interaction_log) + + with open(self.filepath, "r") as f: + content = f.read() + log_dict = json.loads(content.strip()) + span = log_dict["spans"][0] + self.assertEqual(span["span_type"], "LLMSpan") + self.assertEqual(span["span_kind"], "client") + self.assertIn("attributes", span) + self.assertEqual(span["attributes"]["gen_ai.request.model"], "gpt-4") + self.assertEqual(span["attributes"]["gen_ai.request.temperature"], 0.7) + self.assertEqual(span["attributes"]["gen_ai.response.model"], "gpt-4-0613") + self.assertEqual(span["attributes"]["gen_ai.usage.input_tokens"], 50) + self.assertEqual(span["attributes"]["gen_ai.usage.output_tokens"], 100) + self.assertIn("custom_attributes", span) + self.assertEqual(span["custom_attributes"]["custom_key"], "custom_value") + + def test_mixed_span_types(self): + adapter = FileSystemAdapter(filepath=self.filepath) + interaction_log = InteractionLog( + id="test_mixed", + activated_rails=[], + events=[], + trace=[ + InteractionSpan( + name="interaction", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=3.0, + duration=3.0, + span_kind="server", + request_model="gpt-4", + ), + RailSpan( + name="check_jailbreak", + span_id="span_2", + parent_id="span_1", + start_time=0.5, + end_time=1.0, + duration=0.5, + span_kind="internal", + rail_type="input", + rail_name="check_jailbreak", + rail_stop=False, + ), + SpanFlat( + name="legacy_span", + span_id="span_3", + parent_id="span_1", + start_time=1.5, + end_time=2.5, + duration=1.0, + metrics={"tokens": 25}, + ), + ], + ) + adapter.transform(interaction_log) + + with open(self.filepath, "r") as f: + content = f.read() + log_dict = json.loads(content.strip()) + self.assertEqual(len(log_dict["spans"]), 3) + + self.assertEqual(log_dict["spans"][0]["span_type"], "InteractionSpan") + self.assertIn("span_kind", log_dict["spans"][0]) + self.assertIn("attributes", log_dict["spans"][0]) + + self.assertEqual(log_dict["spans"][1]["span_type"], "RailSpan") + self.assertEqual(log_dict["spans"][1]["parent_id"], "span_1") + + self.assertEqual(log_dict["spans"][2]["span_type"], "SpanFlat") + self.assertIn("metrics", log_dict["spans"][2]) + self.assertNotIn("span_kind", log_dict["spans"][2]) + + @unittest.skipIf( + importlib.util.find_spec("aiofiles") is None, "aiofiles is not installed" + ) + def test_transform_async_with_otel_spans(self): + async def run_test(): + adapter = FileSystemAdapter(filepath=self.filepath) + interaction_log = InteractionLog( + id="test_async_otel", + activated_rails=[], + events=[], + trace=[ + InteractionSpan( + name="interaction", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=2.0, + duration=2.0, + span_kind="server", + request_model="gpt-4", + events=[ + SpanEvent( + name="test_event", + timestamp=1.0, + attributes={"key": "value"}, + ) + ], + ) + ], + ) + + await adapter.transform_async(interaction_log) + + with open(self.filepath, "r") as f: + content = f.read() + log_dict = json.loads(content.strip()) + self.assertEqual(log_dict["schema_version"], "2.0") + self.assertEqual(log_dict["trace_id"], "test_async_otel") + span = log_dict["spans"][0] + self.assertEqual(span["span_type"], "InteractionSpan") + self.assertIn("events", span) + self.assertEqual(len(span["events"]), 1) + + asyncio.run(run_test()) From daffaf5e6b1d3735d65af933f8302318fb2d08e8 Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Fri, 15 Aug 2025 11:56:34 +0200 Subject: [PATCH 04/10] restructure tests --- .../adapters/test_filesystem.py} | 0 .../adapters/test_opentelemetry.py} | 0 .../adapters/test_opentelemetry_v2.py} | 0 .../spans}/test_span_extractors.py | 4 +- .../spans}/test_span_format_enum.py | 0 .../spans}/test_span_models_and_extractors.py | 4 +- .../spans}/test_span_v2_integration.py | 4 +- .../spans}/test_span_v2_otel_semantics.py | 0 tests/{ => tracing/spans}/test_spans.py | 2 +- tests/tracing/test_span_formatting.py | 276 ++++++++++++++++++ tests/{ => tracing}/test_tracing.py | 0 11 files changed, 283 insertions(+), 7 deletions(-) rename tests/{test_tracing_adapters_filesystem.py => tracing/adapters/test_filesystem.py} (100%) rename tests/{test_tracing_adapters_opentelemetry.py => tracing/adapters/test_opentelemetry.py} (100%) rename tests/{test_opentelemetry_adapter_v2.py => tracing/adapters/test_opentelemetry_v2.py} (100%) rename tests/{ => tracing/spans}/test_span_extractors.py (98%) rename tests/{ => tracing/spans}/test_span_format_enum.py (100%) rename tests/{ => tracing/spans}/test_span_models_and_extractors.py (98%) rename tests/{ => tracing/spans}/test_span_v2_integration.py (97%) rename tests/{ => tracing/spans}/test_span_v2_otel_semantics.py (100%) rename tests/{ => tracing/spans}/test_spans.py (97%) create mode 100644 tests/tracing/test_span_formatting.py rename tests/{ => tracing}/test_tracing.py (100%) diff --git a/tests/test_tracing_adapters_filesystem.py b/tests/tracing/adapters/test_filesystem.py similarity index 100% rename from tests/test_tracing_adapters_filesystem.py rename to tests/tracing/adapters/test_filesystem.py diff --git a/tests/test_tracing_adapters_opentelemetry.py b/tests/tracing/adapters/test_opentelemetry.py similarity index 100% rename from tests/test_tracing_adapters_opentelemetry.py rename to tests/tracing/adapters/test_opentelemetry.py diff --git a/tests/test_opentelemetry_adapter_v2.py b/tests/tracing/adapters/test_opentelemetry_v2.py similarity index 100% rename from tests/test_opentelemetry_adapter_v2.py rename to tests/tracing/adapters/test_opentelemetry_v2.py diff --git a/tests/test_span_extractors.py b/tests/tracing/spans/test_span_extractors.py similarity index 98% rename from tests/test_span_extractors.py rename to tests/tracing/spans/test_span_extractors.py index 709b9e61e..e98546a64 100644 --- a/tests/test_span_extractors.py +++ b/tests/tracing/spans/test_span_extractors.py @@ -25,7 +25,7 @@ SpanFlat, create_span_extractor, ) -from nemoguardrails.tracing.spans import LLMSpan, is_typed_span +from nemoguardrails.tracing.spans import LLMSpan, is_opentelemetry_span class TestSpanExtractors: @@ -95,7 +95,7 @@ def test_span_extractor_opentelemetry_attributes(self, test_data): # All spans should be typed spans for span in spans: - assert is_typed_span(span) + assert is_opentelemetry_span(span) # LLM spans follow OpenTelemetry convention: "{operation} {model}" llm_span = next(s for s in spans if s.name == "generate_user_intent gpt-4") diff --git a/tests/test_span_format_enum.py b/tests/tracing/spans/test_span_format_enum.py similarity index 100% rename from tests/test_span_format_enum.py rename to tests/tracing/spans/test_span_format_enum.py diff --git a/tests/test_span_models_and_extractors.py b/tests/tracing/spans/test_span_models_and_extractors.py similarity index 98% rename from tests/test_span_models_and_extractors.py rename to tests/tracing/spans/test_span_models_and_extractors.py index 71d1f7475..77ddc7aa7 100644 --- a/tests/test_span_models_and_extractors.py +++ b/tests/tracing/spans/test_span_models_and_extractors.py @@ -27,7 +27,7 @@ SpanOpentelemetry, create_span_extractor, ) -from nemoguardrails.tracing.spans import LLMSpan, is_typed_span +from nemoguardrails.tracing.spans import LLMSpan, is_opentelemetry_span class TestSpanModels: @@ -164,7 +164,7 @@ def test_span_extractor_v2_attributes(self, test_data): for span in spans: # Now we expect typed spans - assert is_typed_span(span) + assert is_opentelemetry_span(span) # In V2, LLM spans follow OpenTelemetry convention: "{operation} {model}" llm_span = next(s for s in spans if s.name == "generate_user_intent gpt-4") diff --git a/tests/test_span_v2_integration.py b/tests/tracing/spans/test_span_v2_integration.py similarity index 97% rename from tests/test_span_v2_integration.py rename to tests/tracing/spans/test_span_v2_integration.py index e76e2003c..f217e3878 100644 --- a/tests/test_span_v2_integration.py +++ b/tests/tracing/spans/test_span_v2_integration.py @@ -18,7 +18,7 @@ from nemoguardrails import LLMRails, RailsConfig from nemoguardrails.rails.llm.options import GenerationOptions from nemoguardrails.tracing import SpanOpentelemetry, create_span_extractor -from nemoguardrails.tracing.spans import LLMSpan, is_typed_span +from nemoguardrails.tracing.spans import LLMSpan, is_opentelemetry_span from tests.utils import FakeLLM @@ -113,7 +113,7 @@ async def test_v2_spans_generated_with_events(v2_config): assert len(interaction_log.trace) > 0 for span in interaction_log.trace: - assert is_typed_span(span) + assert is_opentelemetry_span(span) interaction_span = next( (s for s in interaction_log.trace if s.name == "guardrails.request"), None diff --git a/tests/test_span_v2_otel_semantics.py b/tests/tracing/spans/test_span_v2_otel_semantics.py similarity index 100% rename from tests/test_span_v2_otel_semantics.py rename to tests/tracing/spans/test_span_v2_otel_semantics.py diff --git a/tests/test_spans.py b/tests/tracing/spans/test_spans.py similarity index 97% rename from tests/test_spans.py rename to tests/tracing/spans/test_spans.py index 448f362c7..75a56862e 100644 --- a/tests/test_spans.py +++ b/tests/tracing/spans/test_spans.py @@ -17,7 +17,7 @@ import pytest from nemoguardrails.tracing import SpanEvent, SpanFlat -from nemoguardrails.tracing.spans import LLMSpan, is_typed_span +from nemoguardrails.tracing.spans import LLMSpan, is_opentelemetry_span class TestSpanModels: diff --git a/tests/tracing/test_span_formatting.py b/tests/tracing/test_span_formatting.py new file mode 100644 index 000000000..bea842b00 --- /dev/null +++ b/tests/tracing/test_span_formatting.py @@ -0,0 +1,276 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from nemoguardrails.tracing.span_formatting import ( + extract_span_attributes, + format_span_for_filesystem, +) +from nemoguardrails.tracing.spans import ( + ActionSpan, + InteractionSpan, + LLMSpan, + RailSpan, + SpanEvent, + SpanFlat, +) + + +class TestFormatSpanForFilesystem: + def test_format_flat_span_with_metrics(self): + span = SpanFlat( + name="llm_call", + span_id="span_1", + parent_id="parent_1", + start_time=0.5, + end_time=1.5, + duration=1.0, + metrics={"input_tokens": 10, "output_tokens": 20}, + ) + + result = format_span_for_filesystem(span) + + assert result["name"] == "llm_call" + assert result["span_id"] == "span_1" + assert result["parent_id"] == "parent_1" + assert result["start_time"] == 0.5 + assert result["end_time"] == 1.5 + assert result["duration"] == 1.0 + assert result["span_type"] == "SpanFlat" + assert result["metrics"] == {"input_tokens": 10, "output_tokens": 20} + assert "span_kind" not in result + assert "attributes" not in result + + def test_format_flat_span_without_metrics(self): + span = SpanFlat( + name="test", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={}, + ) + + result = format_span_for_filesystem(span) + + assert result["span_type"] == "SpanFlat" + assert "metrics" not in result + + def test_format_interaction_span(self): + span = InteractionSpan( + name="interaction", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=2.0, + duration=2.0, + span_kind="server", + request_model="gpt-4", + ) + + result = format_span_for_filesystem(span) + + assert result["span_type"] == "InteractionSpan" + assert result["span_kind"] == "server" + assert "attributes" in result + assert result["attributes"]["gen_ai.operation.name"] == "guardrails" + + def test_format_span_with_events(self): + events = [ + SpanEvent( + name="test_event", + timestamp=0.5, + attributes={"key": "value"}, + ) + ] + span = InteractionSpan( + name="interaction", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + span_kind="server", + events=events, + ) + + result = format_span_for_filesystem(span) + + assert "events" in result + assert len(result["events"]) == 1 + assert result["events"][0]["name"] == "test_event" + assert result["events"][0]["timestamp"] == 0.5 + assert result["events"][0]["attributes"] == {"key": "value"} + + def test_format_span_with_error(self): + span = ActionSpan( + name="action", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + span_kind="internal", + action_name="fetch", + error=True, + error_type="ConnectionError", + error_message="Failed", + ) + + result = format_span_for_filesystem(span) + + assert "error" in result + assert result["error"]["occurred"] is True + assert result["error"]["type"] == "ConnectionError" + assert result["error"]["message"] == "Failed" + + def test_format_span_with_custom_attributes(self): + span = LLMSpan( + name="llm", + span_id="span_1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + span_kind="client", + provider_name="openai", + operation_name="chat.completions", + request_model="gpt-4", + response_model="gpt-4", + custom_attributes={"custom": "value"}, + ) + + result = format_span_for_filesystem(span) + + assert "custom_attributes" in result + assert result["custom_attributes"] == {"custom": "value"} + + def test_format_unknown_span_type_raises(self): + class UnknownSpan: + def __init__(self): + self.name = "unknown" + + with pytest.raises(ValueError) as exc_info: + format_span_for_filesystem(UnknownSpan()) + + assert "Unknown span type: UnknownSpan" in str(exc_info.value) + assert "Only SpanFlat and typed spans are supported" in str(exc_info.value) + + +class TestExtractSpanAttributes: + def test_extract_from_flat_span_with_metrics(self): + span = SpanFlat( + name="test", + span_id="1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={"tokens": 100, "latency": 0.5}, + ) + + attrs = extract_span_attributes(span) + + assert attrs == {"tokens": 100, "latency": 0.5} + assert attrs is not span.metrics + + def test_extract_from_flat_span_without_metrics(self): + span = SpanFlat( + name="test", + span_id="1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + metrics={}, + ) + + attrs = extract_span_attributes(span) + + assert attrs == {} + + def test_extract_from_interaction_span(self): + span = InteractionSpan( + name="interaction", + span_id="1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + span_kind="server", + request_model="gpt-4", + ) + + attrs = extract_span_attributes(span) + + assert "span.kind" in attrs + assert attrs["span.kind"] == "server" + assert "gen_ai.operation.name" in attrs + + def test_extract_from_rail_span(self): + span = RailSpan( + name="check", + span_id="1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + span_kind="internal", + rail_type="input", + rail_name="check_jailbreak", + rail_stop=False, + ) + + attrs = extract_span_attributes(span) + + assert attrs["rail.type"] == "input" + assert attrs["rail.name"] == "check_jailbreak" + assert attrs["rail.stop"] is False + + def test_extract_from_llm_span(self): + span = LLMSpan( + name="llm", + span_id="1", + parent_id=None, + start_time=0.0, + end_time=1.0, + duration=1.0, + span_kind="client", + provider_name="openai", + operation_name="chat.completions", + request_model="gpt-4", + response_model="gpt-4", + temperature=0.7, + usage_input_tokens=50, + usage_output_tokens=100, + ) + + attrs = extract_span_attributes(span) + + assert attrs["gen_ai.request.model"] == "gpt-4" + assert attrs["gen_ai.request.temperature"] == 0.7 + assert attrs["gen_ai.usage.input_tokens"] == 50 + assert attrs["gen_ai.usage.output_tokens"] == 100 + + def test_extract_unknown_span_type_raises(self): + class UnknownSpan: + pass + + with pytest.raises(ValueError) as exc_info: + extract_span_attributes(UnknownSpan()) + + assert "Unknown span type: UnknownSpan" in str(exc_info.value) diff --git a/tests/test_tracing.py b/tests/tracing/test_tracing.py similarity index 100% rename from tests/test_tracing.py rename to tests/tracing/test_tracing.py From 93e77958330fbaca4d3c30171f337b914f456885 Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Fri, 15 Aug 2025 12:00:59 +0200 Subject: [PATCH 05/10] refactor(tracing): unify span formatting and attribute extraction --- nemoguardrails/tracing/adapters/filesystem.py | 59 +---------- .../tracing/adapters/opentelemetry.py | 18 +--- nemoguardrails/tracing/span_formatting.py | 98 +++++++++++++++++++ nemoguardrails/tracing/spans.py | 42 +++----- 4 files changed, 121 insertions(+), 96 deletions(-) create mode 100644 nemoguardrails/tracing/span_formatting.py diff --git a/nemoguardrails/tracing/adapters/filesystem.py b/nemoguardrails/tracing/adapters/filesystem.py index 3f647a0cb..5ffb61ffa 100644 --- a/nemoguardrails/tracing/adapters/filesystem.py +++ b/nemoguardrails/tracing/adapters/filesystem.py @@ -18,12 +18,13 @@ import json import os -from typing import TYPE_CHECKING, Any, Dict, Optional +from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from nemoguardrails.tracing import InteractionLog from nemoguardrails.tracing.adapters.base import InteractionLogAdapter +from nemoguardrails.tracing.span_formatting import format_span_for_filesystem class FileSystemAdapter(InteractionLogAdapter): @@ -37,64 +38,12 @@ def __init__(self, filepath: Optional[str] = None): self.filepath = os.path.abspath(filepath) os.makedirs(os.path.dirname(self.filepath), exist_ok=True) - def _extract_span_data(self, span_data) -> Dict[str, Any]: - """Extract all available data from a span.""" - # Start with common fields that all spans have - span_dict = { - "name": span_data.name, - "span_id": span_data.span_id, - "parent_id": span_data.parent_id, - "start_time": span_data.start_time, - "end_time": span_data.end_time, - "duration": span_data.duration, - } - - # Add span type for debugging - span_dict["span_type"] = span_data.__class__.__name__ - - # Handle v1 spans (SpanFlat) - they have metrics - if hasattr(span_data, "metrics") and span_data.metrics: - span_dict["metrics"] = span_data.metrics - - # Handle v2 spans - they have span_kind - if hasattr(span_data, "span_kind"): - span_dict["span_kind"] = span_data.span_kind - - # Extract events if present - if hasattr(span_data, "events") and span_data.events: - span_dict["events"] = [ - { - "name": event.name, - "timestamp": event.timestamp, - "attributes": event.attributes, - } - for event in span_data.events - ] - - # Extract error information if present - if hasattr(span_data, "error") and span_data.error: - span_dict["error"] = { - "occurred": span_data.error, - "type": getattr(span_data, "error_type", None), - "message": getattr(span_data, "error_message", None), - } - - # Extract OpenTelemetry attributes if available - if hasattr(span_data, "to_otel_attributes"): - span_dict["attributes"] = span_data.to_otel_attributes() - - # Include custom attributes if present - if hasattr(span_data, "custom_attributes") and span_data.custom_attributes: - span_dict["custom_attributes"] = span_data.custom_attributes - - return span_dict - def transform(self, interaction_log: "InteractionLog"): """Transforms the InteractionLog into a JSON string.""" spans = [] for span_data in interaction_log.trace: - span_dict = self._extract_span_data(span_data) + span_dict = format_span_for_filesystem(span_data) spans.append(span_dict) log_dict = { @@ -117,7 +66,7 @@ async def transform_async(self, interaction_log: "InteractionLog"): spans = [] for span_data in interaction_log.trace: - span_dict = self._extract_span_data(span_data) + span_dict = format_span_for_filesystem(span_data) spans.append(span_dict) log_dict = { diff --git a/nemoguardrails/tracing/adapters/opentelemetry.py b/nemoguardrails/tracing/adapters/opentelemetry.py index 3dbdd7603..00456954c 100644 --- a/nemoguardrails/tracing/adapters/opentelemetry.py +++ b/nemoguardrails/tracing/adapters/opentelemetry.py @@ -70,7 +70,7 @@ ) from nemoguardrails.tracing.adapters.base import InteractionLogAdapter -from nemoguardrails.tracing.spans import is_typed_span +from nemoguardrails.tracing.span_formatting import extract_span_attributes class OpenTelemetryAdapter(InteractionLogAdapter): @@ -169,10 +169,10 @@ def _create_span( spans, base_time_ns, ): - """Create OTel span from a fully-formed SpanOpentelemetry or typed span object. + """Create OTel span from a span. - This is a pure API bridge - all semantic attributes are already - set by the extractor. We only handle: + This is a pure API bridge - all semantic attributes are extracted + by the formatting function. We only handle: 1. Timestamp conversion (relative to absolute) 2. Span kind mapping (string to enum) 3. API calls to create spans and events @@ -187,10 +187,7 @@ def _create_span( start_time_ns = base_time_ns + relative_start_ns end_time_ns = base_time_ns + relative_end_ns - if is_typed_span(span_data): - attributes = span_data.to_otel_attributes() - else: - attributes = {} + attributes = extract_span_attributes(span_data) from opentelemetry.trace import SpanKind as OTelSpanKind @@ -216,11 +213,6 @@ def _create_span( continue span.set_attribute(key, value) - # for V1 compatibility, also set metrics as attributes - if hasattr(span_data, "metrics") and span_data.metrics: - for key, value in span_data.metrics.items(): - span.set_attribute(key, value) - if hasattr(span_data, "events") and span_data.events: for event in span_data.events: relative_event_ns = int(event.timestamp * 1_000_000_000) diff --git a/nemoguardrails/tracing/span_formatting.py b/nemoguardrails/tracing/span_formatting.py new file mode 100644 index 000000000..c3f814e8a --- /dev/null +++ b/nemoguardrails/tracing/span_formatting.py @@ -0,0 +1,98 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Simple span formatting functions for different output formats.""" + +from typing import Any, Dict + +from nemoguardrails.tracing.spans import SpanFlat, is_opentelemetry_span + + +def format_span_for_filesystem(span) -> Dict[str, Any]: + """Format any span type for JSON filesystem storage. + + Args: + span: Either SpanFlat or typed span (InteractionSpan, RailSpan, etc.) + + Returns: + Dictionary with all span data for JSON serialization + """ + if not isinstance(span, SpanFlat) and not is_opentelemetry_span(span): + raise ValueError( + f"Unknown span type: {type(span).__name__}. " + f"Only SpanFlat and typed spans are supported." + ) + + result = { + "name": span.name, + "span_id": span.span_id, + "parent_id": span.parent_id, + "start_time": span.start_time, + "end_time": span.end_time, + "duration": span.duration, + "span_type": span.__class__.__name__, + } + + if isinstance(span, SpanFlat): + if hasattr(span, "metrics") and span.metrics: + result["metrics"] = span.metrics + + else: # is_typed_span(span) + result["span_kind"] = span.span_kind + result["attributes"] = span.to_otel_attributes() + + if hasattr(span, "events") and span.events: + result["events"] = [ + { + "name": event.name, + "timestamp": event.timestamp, + "attributes": event.attributes, + } + for event in span.events + ] + + if hasattr(span, "error") and span.error: + result["error"] = { + "occurred": span.error, + "type": getattr(span, "error_type", None), + "message": getattr(span, "error_message", None), + } + + if hasattr(span, "custom_attributes") and span.custom_attributes: + result["custom_attributes"] = span.custom_attributes + + return result + + +def extract_span_attributes(span) -> Dict[str, Any]: + """Extract OpenTelemetry attributes from any span type. + + Args: + span: Either SpanFlat or typed span + + Returns: + Dictionary of OpenTelemetry attributes + """ + if isinstance(span, SpanFlat): + return span.metrics.copy() if hasattr(span, "metrics") and span.metrics else {} + + elif is_opentelemetry_span(span): + return span.to_otel_attributes() + + else: + raise ValueError( + f"Unknown span type: {type(span).__name__}. " + f"Only SpanFlat and typed spans are supported." + ) diff --git a/nemoguardrails/tracing/spans.py b/nemoguardrails/tracing/spans.py index 3a21c3024..c7446625b 100644 --- a/nemoguardrails/tracing/spans.py +++ b/nemoguardrails/tracing/spans.py @@ -13,26 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Final, Literal - -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - """Span models for NeMo Guardrails tracing system.""" from abc import ABC, abstractmethod -from typing import Any, Dict, List, Literal, Optional, Union +from enum import Enum +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, Field @@ -43,6 +28,12 @@ ) +class SpanKind(str, Enum): + SERVER = "server" + CLIENT = "client" + INTERNAL = "internal" + + class SpanEvent(BaseModel): """Event that can be attached to a span.""" @@ -88,9 +79,7 @@ class BaseSpan(BaseModel, ABC): end_time: float = Field(description="End time relative to trace start (seconds)") duration: float = Field(description="Duration of the span in seconds") - span_kind: Literal["server", "client", "internal"] = Field( - description="OpenTelemetry span kind" - ) + span_kind: SpanKind = Field(description="OpenTelemetry span kind") events: List[SpanEvent] = Field( default_factory=list, @@ -141,7 +130,7 @@ def _base_attributes(self) -> Dict[str, Any]: class InteractionSpan(BaseSpan): """Top-level span for a guardrails interaction (server span).""" - span_kind: Literal["server"] = "server" + span_kind: SpanKind = SpanKind.SERVER operation_name: str = Field( default="guardrails", description="Operation name for this interaction" @@ -172,8 +161,7 @@ def to_otel_attributes(self) -> Dict[str, Any]: class RailSpan(BaseSpan): """Span for a guardrail execution (internal span).""" - span_kind: Literal["internal"] = "internal" - + span_kind: SpanKind = SpanKind.INTERNAL # rail-specific attributes rail_type: str = Field(description="Type of rail (e.g., input, output, dialog)") rail_name: str = Field(description="Name of the rail (e.g., check_jailbreak)") @@ -202,8 +190,7 @@ def to_otel_attributes(self) -> Dict[str, Any]: class ActionSpan(BaseSpan): """Span for an action execution (internal span).""" - span_kind: Literal["internal"] = "internal" - + span_kind: SpanKind = SpanKind.INTERNAL # action-specific attributes action_name: str = Field(description="Name of the action being executed") action_params: Dict[str, Any] = Field( @@ -237,8 +224,7 @@ def to_otel_attributes(self) -> Dict[str, Any]: class LLMSpan(BaseSpan): """Span for an LLM API call (client span).""" - span_kind: Final[Literal["client"]] = "client" - + span_kind: SpanKind = SpanKind.CLIENT provider_name: str = Field( description="LLM provider name (e.g., openai, anthropic)" ) @@ -342,7 +328,7 @@ def to_otel_attributes(self) -> Dict[str, Any]: SpanOpentelemetry = TypedSpan -def is_typed_span(span: Any) -> bool: +def is_opentelemetry_span(span: Any) -> bool: """Check if an object is a typed span (V2). Args: From f7d855aae1ee669d43dd78c0fefd958cf17522cd Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Tue, 19 Aug 2025 14:29:36 +0200 Subject: [PATCH 06/10] rename flat to legacy --- nemoguardrails/rails/llm/config.py | 2 +- nemoguardrails/tracing/__init__.py | 4 +- nemoguardrails/tracing/interaction_types.py | 6 +-- nemoguardrails/tracing/span_extractors.py | 26 +++++----- nemoguardrails/tracing/span_format.py | 6 +-- nemoguardrails/tracing/span_formatting.py | 16 +++--- nemoguardrails/tracing/spans.py | 4 +- tests/tracing/adapters/test_filesystem.py | 18 +++---- tests/tracing/adapters/test_opentelemetry.py | 18 +++---- .../tracing/adapters/test_opentelemetry_v2.py | 6 +-- tests/tracing/spans/test_span_extractors.py | 30 +++++------ tests/tracing/spans/test_span_format_enum.py | 52 +++++++++---------- .../spans/test_span_models_and_extractors.py | 12 ++--- .../tracing/spans/test_span_v2_integration.py | 10 ++-- tests/tracing/spans/test_spans.py | 32 ++++++------ tests/tracing/test_span_formatting.py | 24 ++++----- 16 files changed, 133 insertions(+), 133 deletions(-) diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py index 0136fee58..bbb9a18d3 100644 --- a/nemoguardrails/rails/llm/config.py +++ b/nemoguardrails/rails/llm/config.py @@ -366,7 +366,7 @@ class TracingConfig(BaseModel): ) span_format: str = Field( default="opentelemetry", - description="The span format to use. Options are 'flat' (simple metrics) or 'opentelemetry' (OpenTelemetry semantic conventions).", + description="The span format to use. Options are 'legacy' (simple metrics) or 'opentelemetry' (OpenTelemetry semantic conventions).", ) enable_content_capture: bool = Field( default=False, diff --git a/nemoguardrails/tracing/__init__.py b/nemoguardrails/tracing/__init__.py index 97eb81885..69492c40d 100644 --- a/nemoguardrails/tracing/__init__.py +++ b/nemoguardrails/tracing/__init__.py @@ -20,7 +20,7 @@ SpanExtractorV2, create_span_extractor, ) -from .spans import SpanEvent, SpanFlat, SpanOpentelemetry +from .spans import SpanEvent, SpanLegacy, SpanOpentelemetry from .tracer import Tracer, create_log_adapters ___all__ = [ @@ -31,6 +31,6 @@ Tracer, create_log_adapters, SpanEvent, - SpanFlat, + SpanLegacy, SpanOpentelemetry, ] diff --git a/nemoguardrails/tracing/interaction_types.py b/nemoguardrails/tracing/interaction_types.py index ca8f658ed..51f77bdbd 100644 --- a/nemoguardrails/tracing/interaction_types.py +++ b/nemoguardrails/tracing/interaction_types.py @@ -21,7 +21,7 @@ from nemoguardrails.rails.llm.options import ActivatedRail, GenerationLog from nemoguardrails.tracing.span_extractors import SpanExtractor, create_span_extractor -from nemoguardrails.tracing.spans import SpanFlat, SpanOpentelemetry +from nemoguardrails.tracing.spans import SpanLegacy, SpanOpentelemetry class InteractionLog(BaseModel): @@ -36,7 +36,7 @@ class InteractionLog(BaseModel): default_factory=list, description="The full list of events recorded during the interaction.", ) - trace: List[Union[SpanFlat, SpanOpentelemetry]] = Field( + trace: List[Union[SpanLegacy, SpanOpentelemetry]] = Field( default_factory=list, description="Detailed information about the execution." ) @@ -62,7 +62,7 @@ def extract_interaction_log( Args: interaction_output: The interaction output generation_log: The generation log - span_format: Span format to use ("flat" or "opentelemetry") + span_format: Span format to use ("legacy" or "opentelemetry") enable_content_capture: Whether to include content in trace events """ internal_events = generation_log.internal_events diff --git a/nemoguardrails/tracing/span_extractors.py b/nemoguardrails/tracing/span_extractors.py index 3d31b9229..637f754f9 100644 --- a/nemoguardrails/tracing/span_extractors.py +++ b/nemoguardrails/tracing/span_extractors.py @@ -34,7 +34,7 @@ LLMSpan, RailSpan, SpanEvent, - SpanFlat, + SpanLegacy, SpanOpentelemetry, TypedSpan, ) @@ -47,7 +47,7 @@ class SpanExtractor(ABC): @abstractmethod def extract_spans( self, activated_rails: List[ActivatedRail] - ) -> List[Union[SpanFlat, SpanOpentelemetry]]: + ) -> List[Union[SpanLegacy, SpanOpentelemetry]]: """Extract spans from activated rails.""" ... @@ -57,16 +57,16 @@ class SpanExtractorV1(SpanExtractor): def extract_spans( self, activated_rails: List[ActivatedRail] - ) -> List[Union[SpanFlat, SpanOpentelemetry]]: + ) -> List[Union[SpanLegacy, SpanOpentelemetry]]: """Extract v1 spans from activated rails.""" - spans: List[SpanFlat] = [] + spans: List[SpanLegacy] = [] if not activated_rails: return spans ref_time = activated_rails[0].started_at or 0.0 # Create interaction span - interaction_span = SpanFlat( + interaction_span = SpanLegacy( span_id=new_uuid(), name=SpanTypes.INTERACTION, # V1 uses legacy naming start_time=(activated_rails[0].started_at or 0.0) - ref_time, @@ -86,7 +86,7 @@ def extract_spans( # Process rails and actions for activated_rail in activated_rails: - rail_span = SpanFlat( + rail_span = SpanLegacy( span_id=new_uuid(), name="rail: " + activated_rail.name, parent_id=interaction_span.span_id, @@ -97,7 +97,7 @@ def extract_spans( spans.append(rail_span) for action in activated_rail.executed_actions: - action_span = SpanFlat( + action_span = SpanLegacy( span_id=new_uuid(), name="action: " + action.action_name, parent_id=rail_span.span_id, @@ -119,7 +119,7 @@ def extract_spans( # Process LLM calls for llm_call in action.llm_calls: model_name = llm_call.llm_model_name or SystemConstants.UNKNOWN - llm_span = SpanFlat( + llm_span = SpanLegacy( span_id=new_uuid(), name="LLM: " + model_name, parent_id=action_span.span_id, @@ -165,7 +165,7 @@ def __init__( def extract_spans( self, activated_rails: List[ActivatedRail] - ) -> List[Union[SpanFlat, SpanOpentelemetry, TypedSpan]]: + ) -> List[Union[SpanLegacy, SpanOpentelemetry, TypedSpan]]: """Extract v2 spans from activated rails with OpenTelemetry attributes.""" spans: List[TypedSpan] = [] ref_time = activated_rails[0].started_at or 0.0 @@ -451,14 +451,14 @@ def _extract_finish_reasons(self, raw_response: dict) -> Optional[List[str]]: def create_span_extractor( - span_format: str = "flat", + span_format: str = "legacy", events: Optional[List[dict]] = None, enable_content_capture: bool = True, ) -> SpanExtractor: """Create a span extractor based on format and configuration. Args: - span_format: Format of span extractor ('flat' or 'opentelemetry') + span_format: Format of span extractor ('legacy' or 'opentelemetry') events: Internal events for OpenTelemetry extractor enable_content_capture: Whether to capture content in spans @@ -470,8 +470,8 @@ def create_span_extractor( """ format_enum = validate_span_format(span_format) - if format_enum == SpanFormat.FLAT: - return SpanExtractorV1() # TODO: Rename to SpanExtractorFlat + if format_enum == SpanFormat.LEGACY: + return SpanExtractorV1() # TODO: Rename to SpanExtractorLegacy elif format_enum == SpanFormat.OPENTELEMETRY: return SpanExtractorV2( # TODO: Rename to SpanExtractorOTel events=events, diff --git a/nemoguardrails/tracing/span_format.py b/nemoguardrails/tracing/span_format.py index 56205073a..d524c127a 100644 --- a/nemoguardrails/tracing/span_format.py +++ b/nemoguardrails/tracing/span_format.py @@ -25,8 +25,8 @@ class SpanFormat(str, Enum): Inherits from str to allow direct string comparison and JSON serialization. """ - # flat structure with metrics dictionary (simple, minimal overhead) - FLAT = "flat" + # legacy structure with metrics dictionary (simple, minimal overhead) + LEGACY = "legacy" # OpenTelemetry Semantic Conventions compliant format # see https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-events/ @@ -60,7 +60,7 @@ def __str__(self) -> str: # Type alias for function signatures -SpanFormatType = Union[SpanFormat, Literal["flat", "opentelemetry"], str] +SpanFormatType = Union[SpanFormat, Literal["legacy", "opentelemetry"], str] def validate_span_format(value: SpanFormatType) -> SpanFormat: diff --git a/nemoguardrails/tracing/span_formatting.py b/nemoguardrails/tracing/span_formatting.py index c3f814e8a..96470d7f8 100644 --- a/nemoguardrails/tracing/span_formatting.py +++ b/nemoguardrails/tracing/span_formatting.py @@ -17,22 +17,22 @@ from typing import Any, Dict -from nemoguardrails.tracing.spans import SpanFlat, is_opentelemetry_span +from nemoguardrails.tracing.spans import SpanLegacy, is_opentelemetry_span def format_span_for_filesystem(span) -> Dict[str, Any]: """Format any span type for JSON filesystem storage. Args: - span: Either SpanFlat or typed span (InteractionSpan, RailSpan, etc.) + span: Either SpanLegacy or typed span (InteractionSpan, RailSpan, etc.) Returns: Dictionary with all span data for JSON serialization """ - if not isinstance(span, SpanFlat) and not is_opentelemetry_span(span): + if not isinstance(span, SpanLegacy) and not is_opentelemetry_span(span): raise ValueError( f"Unknown span type: {type(span).__name__}. " - f"Only SpanFlat and typed spans are supported." + f"Only SpanLegacy and typed spans are supported." ) result = { @@ -45,7 +45,7 @@ def format_span_for_filesystem(span) -> Dict[str, Any]: "span_type": span.__class__.__name__, } - if isinstance(span, SpanFlat): + if isinstance(span, SpanLegacy): if hasattr(span, "metrics") and span.metrics: result["metrics"] = span.metrics @@ -80,12 +80,12 @@ def extract_span_attributes(span) -> Dict[str, Any]: """Extract OpenTelemetry attributes from any span type. Args: - span: Either SpanFlat or typed span + span: Either SpanLegacy or typed span Returns: Dictionary of OpenTelemetry attributes """ - if isinstance(span, SpanFlat): + if isinstance(span, SpanLegacy): return span.metrics.copy() if hasattr(span, "metrics") and span.metrics else {} elif is_opentelemetry_span(span): @@ -94,5 +94,5 @@ def extract_span_attributes(span) -> Dict[str, Any]: else: raise ValueError( f"Unknown span type: {type(span).__name__}. " - f"Only SpanFlat and typed spans are supported." + f"Only SpanLegacy and typed spans are supported." ) diff --git a/nemoguardrails/tracing/spans.py b/nemoguardrails/tracing/spans.py index c7446625b..fb89fb394 100644 --- a/nemoguardrails/tracing/spans.py +++ b/nemoguardrails/tracing/spans.py @@ -47,7 +47,7 @@ class SpanEvent(BaseModel): ) -class SpanFlat(BaseModel): +class SpanLegacy(BaseModel): """Simple span model (v1) for basic tracing.""" span_id: str = Field(description="The id of the span.") @@ -101,7 +101,7 @@ class BaseSpan(BaseModel, ABC): @abstractmethod def to_otel_attributes(self) -> Dict[str, Any]: - """Convert typed fields to flat OpenTelemetry attributes dictionary. + """Convert typed fields to legacy OpenTelemetry attributes dictionary. Returns: Dict containing OTel semantic convention attributes. diff --git a/tests/tracing/adapters/test_filesystem.py b/tests/tracing/adapters/test_filesystem.py index d79b14c4d..97c77e618 100644 --- a/tests/tracing/adapters/test_filesystem.py +++ b/tests/tracing/adapters/test_filesystem.py @@ -20,7 +20,7 @@ import tempfile import unittest -from nemoguardrails.tracing import InteractionLog, SpanFlat +from nemoguardrails.tracing import InteractionLog, SpanLegacy from nemoguardrails.tracing.adapters.filesystem import FileSystemAdapter from nemoguardrails.tracing.spans import ( ActionSpan, @@ -58,7 +58,7 @@ def test_transform(self): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="test_span", span_id="span_1", parent_id=None, @@ -92,7 +92,7 @@ async def run_test(): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="test_span", span_id="span_1", parent_id=None, @@ -122,7 +122,7 @@ def test_schema_version(self): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="test_span", span_id="span_1", parent_id=None, @@ -140,14 +140,14 @@ def test_schema_version(self): log_dict = json.loads(content.strip()) self.assertEqual(log_dict["schema_version"], "2.0") - def test_span_flat_with_metrics(self): + def test_span_legacy_with_metrics(self): adapter = FileSystemAdapter(filepath=self.filepath) interaction_log = InteractionLog( id="test_trace", activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="llm_call", span_id="span_1", parent_id=None, @@ -168,7 +168,7 @@ def test_span_flat_with_metrics(self): content = f.read() log_dict = json.loads(content.strip()) span = log_dict["spans"][0] - self.assertEqual(span["span_type"], "SpanFlat") + self.assertEqual(span["span_type"], "SpanLegacy") self.assertIn("metrics", span) self.assertEqual(span["metrics"]["input_tokens"], 10) self.assertEqual(span["metrics"]["output_tokens"], 20) @@ -367,7 +367,7 @@ def test_mixed_span_types(self): rail_name="check_jailbreak", rail_stop=False, ), - SpanFlat( + SpanLegacy( name="legacy_span", span_id="span_3", parent_id="span_1", @@ -392,7 +392,7 @@ def test_mixed_span_types(self): self.assertEqual(log_dict["spans"][1]["span_type"], "RailSpan") self.assertEqual(log_dict["spans"][1]["parent_id"], "span_1") - self.assertEqual(log_dict["spans"][2]["span_type"], "SpanFlat") + self.assertEqual(log_dict["spans"][2]["span_type"], "SpanLegacy") self.assertIn("metrics", log_dict["spans"][2]) self.assertNotIn("span_kind", log_dict["spans"][2]) diff --git a/tests/tracing/adapters/test_opentelemetry.py b/tests/tracing/adapters/test_opentelemetry.py index 455d9c213..f6c1405dc 100644 --- a/tests/tracing/adapters/test_opentelemetry.py +++ b/tests/tracing/adapters/test_opentelemetry.py @@ -25,7 +25,7 @@ from nemoguardrails.tracing import ( InteractionLog, SpanEvent, - SpanFlat, + SpanLegacy, SpanOpentelemetry, ) from nemoguardrails.tracing.adapters.opentelemetry import OpenTelemetryAdapter @@ -77,7 +77,7 @@ def test_transform(self): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="test_span", span_id="span_1", parent_id=None, @@ -129,7 +129,7 @@ def test_transform_span_attributes_various_types(self): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="test_span", span_id="span_1", parent_id=None, @@ -182,7 +182,7 @@ def test_transform_with_tracer_failure(self): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="test_span", span_id="span_1", parent_id=None, @@ -210,7 +210,7 @@ def test_transform_with_parent_child_relationships(self): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="parent_span", span_id="span_1", parent_id=None, @@ -219,7 +219,7 @@ def test_transform_with_parent_child_relationships(self): duration=2.0, metrics={"parent_key": 1}, ), - SpanFlat( + SpanLegacy( name="child_span", span_id="span_2", parent_id="span_1", @@ -288,7 +288,7 @@ async def run_test(): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="test_span", span_id="span_1", parent_id=None, @@ -345,7 +345,7 @@ async def run_test(): activated_rails=[], events=[], trace=[ - SpanFlat( + SpanLegacy( name="test_span", span_id="span_1", parent_id=None, @@ -412,7 +412,7 @@ def track_span(*args, **kwargs): spans = [] for i in range(5): spans.append( - SpanFlat( + SpanLegacy( name=f"v1_span_{i}", span_id=str(i), start_time=float(i * 0.1), # 0, 0.1, 0.2, 0.3, 0.4 diff --git a/tests/tracing/adapters/test_opentelemetry_v2.py b/tests/tracing/adapters/test_opentelemetry_v2.py index ea190f42b..fae39b129 100644 --- a/tests/tracing/adapters/test_opentelemetry_v2.py +++ b/tests/tracing/adapters/test_opentelemetry_v2.py @@ -19,7 +19,7 @@ from nemoguardrails.tracing import ( InteractionLog, SpanEvent, - SpanFlat, + SpanLegacy, SpanOpentelemetry, ) from nemoguardrails.tracing.adapters.opentelemetry import OpenTelemetryAdapter @@ -49,7 +49,7 @@ def test_v1_span_compatibility(self): mock_span = MagicMock() self.mock_tracer.start_span.return_value = mock_span - v1_span = SpanFlat( + v1_span = SpanLegacy( name="test_v1", span_id="v1_123", start_time=0.0, @@ -208,7 +208,7 @@ def test_mixed_v1_v2_spans(self): mock_span = MagicMock() self.mock_tracer.start_span.return_value = mock_span - v1_span = SpanFlat( + v1_span = SpanLegacy( name="action: check_input", span_id="v1_span", start_time=0.0, diff --git a/tests/tracing/spans/test_span_extractors.py b/tests/tracing/spans/test_span_extractors.py index e98546a64..9c9c85c05 100644 --- a/tests/tracing/spans/test_span_extractors.py +++ b/tests/tracing/spans/test_span_extractors.py @@ -22,14 +22,14 @@ from nemoguardrails.tracing import ( SpanExtractorV1, SpanExtractorV2, - SpanFlat, + SpanLegacy, create_span_extractor, ) from nemoguardrails.tracing.spans import LLMSpan, is_opentelemetry_span class TestSpanExtractors: - """Test span extraction for flat and OpenTelemetry formats.""" + """Test span extraction for legacy and OpenTelemetry formats.""" @pytest.fixture def test_data(self): @@ -70,16 +70,16 @@ def test_data(self): return [rail] - def test_span_extractor_flat_format(self, test_data): - """Test flat format span extractor produces flat spans.""" + def test_span_extractor_legacy_format(self, test_data): + """Test legacy format span extractor produces legacy spans.""" extractor = SpanExtractorV1() spans = extractor.extract_spans(test_data) assert len(spans) > 0 - # All spans should be flat format + # All spans should be legacy format for span in spans: - assert isinstance(span, SpanFlat) + assert isinstance(span, SpanLegacy) assert not hasattr(span, "attributes") span_names = [s.name for s in spans] @@ -185,9 +185,9 @@ def test_span_extractor_conversation_events(self, test_data): class TestSpanFormatConfiguration: """Test span format configuration and factory.""" - def test_create_span_extractor_flat(self): - """Test creating flat format span extractor.""" - extractor = create_span_extractor(span_format="flat") + def test_create_span_extractor_legacy(self): + """Test creating legacy format span extractor.""" + extractor = create_span_extractor(span_format="legacy") assert isinstance(extractor, SpanExtractorV1) def test_create_span_extractor_opentelemetry(self): @@ -211,11 +211,11 @@ def test_opentelemetry_extractor_with_events(self): assert isinstance(extractor, SpanExtractorV2) assert extractor.internal_events == events - def test_flat_extractor_ignores_extra_params(self): - """Test flat extractor ignores OpenTelemetry-specific parameters.""" - # Flat extractor should ignore events and enable_content_capture + def test_legacy_extractor_ignores_extra_params(self): + """Test legacy extractor ignores OpenTelemetry-specific parameters.""" + # Legacy extractor should ignore events and enable_content_capture extractor = create_span_extractor( - span_format="flat", events=[{"type": "test"}], enable_content_capture=True + span_format="legacy", events=[{"type": "test"}], enable_content_capture=True ) assert isinstance(extractor, SpanExtractorV1) @@ -226,8 +226,8 @@ def test_flat_extractor_ignores_extra_params(self): @pytest.mark.parametrize( "format_str,expected_class", [ - ("flat", SpanExtractorV1), - ("FLAT", SpanExtractorV1), + ("legacy", SpanExtractorV1), + ("LEGACY", SpanExtractorV1), ("opentelemetry", SpanExtractorV2), ("OPENTELEMETRY", SpanExtractorV2), ("OpenTelemetry", SpanExtractorV2), diff --git a/tests/tracing/spans/test_span_format_enum.py b/tests/tracing/spans/test_span_format_enum.py index 32b19b57b..174bbd9fb 100644 --- a/tests/tracing/spans/test_span_format_enum.py +++ b/tests/tracing/spans/test_span_format_enum.py @@ -30,40 +30,40 @@ class TestSpanFormat: def test_enum_values(self): """Test that enum has expected values.""" - assert SpanFormat.FLAT.value == "flat" + assert SpanFormat.LEGACY.value == "legacy" assert SpanFormat.OPENTELEMETRY.value == "opentelemetry" def test_string_inheritance(self): """Test that SpanFormat inherits from str.""" - assert isinstance(SpanFormat.FLAT, str) + assert isinstance(SpanFormat.LEGACY, str) assert isinstance(SpanFormat.OPENTELEMETRY, str) def test_string_comparison(self): """Test direct string comparison works.""" - assert SpanFormat.FLAT == "flat" + assert SpanFormat.LEGACY == "legacy" assert SpanFormat.OPENTELEMETRY == "opentelemetry" - assert SpanFormat.FLAT != "opentelemetry" + assert SpanFormat.LEGACY != "opentelemetry" def test_json_serialization(self): """Test that enum values can be JSON serialized.""" - data = {"format": SpanFormat.FLAT} + data = {"format": SpanFormat.LEGACY} json_str = json.dumps(data) - assert '"format": "flat"' in json_str + assert '"format": "legacy"' in json_str parsed = json.loads(json_str) - assert parsed["format"] == "flat" + assert parsed["format"] == "legacy" def test_str_method(self): """Test __str__ method returns value.""" - assert str(SpanFormat.FLAT) == "flat" + assert str(SpanFormat.LEGACY) == "legacy" assert str(SpanFormat.OPENTELEMETRY) == "opentelemetry" def test_from_string_valid_values(self): """Test from_string with valid values.""" - assert SpanFormat.from_string("flat") == SpanFormat.FLAT + assert SpanFormat.from_string("legacy") == SpanFormat.LEGACY assert SpanFormat.from_string("opentelemetry") == SpanFormat.OPENTELEMETRY - assert SpanFormat.from_string("FLAT") == SpanFormat.FLAT + assert SpanFormat.from_string("LEGACY") == SpanFormat.LEGACY assert SpanFormat.from_string("OpenTelemetry") == SpanFormat.OPENTELEMETRY assert SpanFormat.from_string("OPENTELEMETRY") == SpanFormat.OPENTELEMETRY @@ -74,7 +74,7 @@ def test_from_string_invalid_value(self): error_msg = str(exc_info.value) assert "Invalid span format: 'invalid'" in error_msg - assert "Valid formats are: flat, opentelemetry" in error_msg + assert "Valid formats are: legacy, opentelemetry" in error_msg def test_from_string_empty_value(self): """Test from_string with empty string raises ValueError.""" @@ -92,8 +92,8 @@ class TestValidateSpanFormat: def test_validate_span_format_enum(self): """Test validation with SpanFormat enum.""" - result = validate_span_format(SpanFormat.FLAT) - assert result == SpanFormat.FLAT + result = validate_span_format(SpanFormat.LEGACY) + assert result == SpanFormat.LEGACY assert isinstance(result, SpanFormat) result = validate_span_format(SpanFormat.OPENTELEMETRY) @@ -102,16 +102,16 @@ def test_validate_span_format_enum(self): def test_validate_span_format_string(self): """Test validation with string values.""" - result = validate_span_format("flat") - assert result == SpanFormat.FLAT + result = validate_span_format("legacy") + assert result == SpanFormat.LEGACY assert isinstance(result, SpanFormat) result = validate_span_format("opentelemetry") assert result == SpanFormat.OPENTELEMETRY assert isinstance(result, SpanFormat) - result = validate_span_format("FLAT") - assert result == SpanFormat.FLAT + result = validate_span_format("LEGACY") + assert result == SpanFormat.LEGACY def test_validate_span_format_invalid_string(self): """Test validation with invalid string raises ValueError.""" @@ -138,12 +138,12 @@ def test_validate_span_format_none(self): def test_validate_span_format_list(self): """Test validation with list raises TypeError.""" with pytest.raises(TypeError): - validate_span_format(["flat"]) + validate_span_format(["legacy"]) def test_validate_span_format_dict(self): """Test validation with dict raises TypeError.""" with pytest.raises(TypeError): - validate_span_format({"format": "flat"}) + validate_span_format({"format": "legacy"}) class TestSpanFormatType: @@ -155,8 +155,8 @@ def test_type_alias_accepts_enum(self): def test_function(format_type: SpanFormatType) -> SpanFormat: return validate_span_format(format_type) - result = test_function(SpanFormat.FLAT) - assert result == SpanFormat.FLAT + result = test_function(SpanFormat.LEGACY) + assert result == SpanFormat.LEGACY def test_type_alias_accepts_string(self): """Test that type alias accepts string values.""" @@ -164,8 +164,8 @@ def test_type_alias_accepts_string(self): def test_function(format_type: SpanFormatType) -> SpanFormat: return validate_span_format(format_type) - result = test_function("flat") - assert result == SpanFormat.FLAT + result = test_function("legacy") + assert result == SpanFormat.LEGACY result = test_function("opentelemetry") assert result == SpanFormat.OPENTELEMETRY @@ -187,12 +187,12 @@ def test_config_usage_pattern(self): def test_function_parameter_pattern(self): """Test typical function parameter usage pattern.""" - def process_spans(span_format: SpanFormatType = SpanFormat.FLAT): + def process_spans(span_format: SpanFormatType = SpanFormat.LEGACY): validated_format = validate_span_format(span_format) return validated_format result = process_spans() - assert result == SpanFormat.FLAT + assert result == SpanFormat.LEGACY result = process_spans("opentelemetry") assert result == SpanFormat.OPENTELEMETRY @@ -202,7 +202,7 @@ def process_spans(span_format: SpanFormatType = SpanFormat.FLAT): def test_all_enum_values_have_tests(self): """Ensure all enum values are tested.""" - tested_values = {"flat", "opentelemetry"} + tested_values = {"legacy", "opentelemetry"} actual_values = {format_enum.value for format_enum in SpanFormat} assert ( tested_values == actual_values diff --git a/tests/tracing/spans/test_span_models_and_extractors.py b/tests/tracing/spans/test_span_models_and_extractors.py index 77ddc7aa7..ed6bebec3 100644 --- a/tests/tracing/spans/test_span_models_and_extractors.py +++ b/tests/tracing/spans/test_span_models_and_extractors.py @@ -23,7 +23,7 @@ SpanEvent, SpanExtractorV1, SpanExtractorV2, - SpanFlat, + SpanLegacy, SpanOpentelemetry, create_span_extractor, ) @@ -32,7 +32,7 @@ class TestSpanModels: def test_span_v1_creation(self): - span = SpanFlat( + span = SpanLegacy( span_id="test-123", name="test span", start_time=0.0, @@ -87,7 +87,7 @@ def test_span_v2_creation(self): assert attributes["gen_ai.provider.name"] == "openai" assert attributes["gen_ai.request.model"] == "gpt-4" - assert not isinstance(span, SpanFlat) + assert not isinstance(span, SpanLegacy) # Python 3.9 compatibility: cannot use isinstance with Union types # SpanOpentelemetry is TypedSpan which is a Union, so check the actual type assert isinstance(span, LLMSpan) @@ -149,7 +149,7 @@ def test_span_extractor_v1(self, test_data): assert len(spans) > 0 for span in spans: - assert isinstance(span, SpanFlat) + assert isinstance(span, SpanLegacy) assert not hasattr(span, "attributes") span_names = [s.name for s in spans] @@ -251,8 +251,8 @@ def test_span_extractor_v2_conversation_events(self, test_data): class TestSpanVersionConfiguration: - def test_create_span_extractor_flat(self): - extractor = create_span_extractor(span_format="flat") + def test_create_span_extractor_legacy(self): + extractor = create_span_extractor(span_format="legacy") assert isinstance(extractor, SpanExtractorV1) def test_create_span_extractor_opentelemetry(self): diff --git a/tests/tracing/spans/test_span_v2_integration.py b/tests/tracing/spans/test_span_v2_integration.py index f217e3878..e82becc91 100644 --- a/tests/tracing/spans/test_span_v2_integration.py +++ b/tests/tracing/spans/test_span_v2_integration.py @@ -50,7 +50,7 @@ def v1_config(): tracing: enabled: true - span_format: flat + span_format: legacy adapters: [] """ ) @@ -136,12 +136,12 @@ async def test_v2_spans_generated_with_events(v2_config): def test_v1_backward_compatibility(v1_config): - assert v1_config.tracing.span_format == "flat" + assert v1_config.tracing.span_format == "legacy" llm = FakeLLM(responses=["Hello!"]) _rails = LLMRails(config=v1_config, llm=llm) - extractor = create_span_extractor(span_format="flat") + extractor = create_span_extractor(span_format="legacy") assert extractor.__class__.__name__ == "SpanExtractorV1" @@ -150,8 +150,8 @@ def test_default_span_format(default_config): def test_span_format_configuration_direct(): - extractor_flat = create_span_extractor(span_format="flat") - assert extractor_flat.__class__.__name__ == "SpanExtractorV1" + extractor_legacy = create_span_extractor(span_format="legacy") + assert extractor_legacy.__class__.__name__ == "SpanExtractorV1" extractor_otel = create_span_extractor(span_format="opentelemetry") assert extractor_otel.__class__.__name__ == "SpanExtractorV2" diff --git a/tests/tracing/spans/test_spans.py b/tests/tracing/spans/test_spans.py index 75a56862e..2cf218bc0 100644 --- a/tests/tracing/spans/test_spans.py +++ b/tests/tracing/spans/test_spans.py @@ -16,16 +16,16 @@ import pytest -from nemoguardrails.tracing import SpanEvent, SpanFlat +from nemoguardrails.tracing import SpanEvent, SpanLegacy from nemoguardrails.tracing.spans import LLMSpan, is_opentelemetry_span class TestSpanModels: - """Test the span models for flat and OpenTelemetry formats.""" + """Test the span models for legacy and OpenTelemetry formats.""" - def test_span_flat_creation(self): - """Test creating a flat format span.""" - span = SpanFlat( + def test_span_legacy_creation(self): + """Test creating a legacy format span.""" + span = SpanLegacy( span_id="test-123", name="test span", start_time=0.0, @@ -39,7 +39,7 @@ def test_span_flat_creation(self): assert span.duration == 1.0 assert span.metrics["test_metric"] == 42 - # Flat spans don't have OpenTelemetry attributes + # Legacy spans don't have OpenTelemetry attributes assert not hasattr(span, "attributes") assert not hasattr(span, "events") assert not hasattr(span, "otel_metrics") @@ -78,10 +78,10 @@ def test_span_opentelemetry_creation(self): assert attributes["gen_ai.provider.name"] == "openai" assert attributes["gen_ai.request.model"] == "gpt-4" - def test_span_flat_model_is_simple(self): - """Test that Flat span model is a simple span without OpenTelemetry features.""" - flat_span = SpanFlat( - span_id="flat-123", + def test_span_legacy_model_is_simple(self): + """Test that Legacy span model is a simple span without OpenTelemetry features.""" + legacy_span = SpanLegacy( + span_id="legacy-123", name="test", start_time=0.0, end_time=1.0, @@ -89,10 +89,10 @@ def test_span_flat_model_is_simple(self): metrics={"metric": 1}, ) - assert isinstance(flat_span, SpanFlat) - assert flat_span.span_id == "flat-123" - assert flat_span.metrics["metric"] == 1 + assert isinstance(legacy_span, SpanLegacy) + assert legacy_span.span_id == "legacy-123" + assert legacy_span.metrics["metric"] == 1 - # Flat spans don't have OpenTelemetry attributes or events - assert not hasattr(flat_span, "attributes") - assert not hasattr(flat_span, "events") + # Legacy spans don't have OpenTelemetry attributes or events + assert not hasattr(legacy_span, "attributes") + assert not hasattr(legacy_span, "events") diff --git a/tests/tracing/test_span_formatting.py b/tests/tracing/test_span_formatting.py index bea842b00..2e8cbff1d 100644 --- a/tests/tracing/test_span_formatting.py +++ b/tests/tracing/test_span_formatting.py @@ -25,13 +25,13 @@ LLMSpan, RailSpan, SpanEvent, - SpanFlat, + SpanLegacy, ) class TestFormatSpanForFilesystem: - def test_format_flat_span_with_metrics(self): - span = SpanFlat( + def test_format_legacy_span_with_metrics(self): + span = SpanLegacy( name="llm_call", span_id="span_1", parent_id="parent_1", @@ -49,13 +49,13 @@ def test_format_flat_span_with_metrics(self): assert result["start_time"] == 0.5 assert result["end_time"] == 1.5 assert result["duration"] == 1.0 - assert result["span_type"] == "SpanFlat" + assert result["span_type"] == "SpanLegacy" assert result["metrics"] == {"input_tokens": 10, "output_tokens": 20} assert "span_kind" not in result assert "attributes" not in result - def test_format_flat_span_without_metrics(self): - span = SpanFlat( + def test_format_legacy_span_without_metrics(self): + span = SpanLegacy( name="test", span_id="span_1", parent_id=None, @@ -67,7 +67,7 @@ def test_format_flat_span_without_metrics(self): result = format_span_for_filesystem(span) - assert result["span_type"] == "SpanFlat" + assert result["span_type"] == "SpanLegacy" assert "metrics" not in result def test_format_interaction_span(self): @@ -168,12 +168,12 @@ def __init__(self): format_span_for_filesystem(UnknownSpan()) assert "Unknown span type: UnknownSpan" in str(exc_info.value) - assert "Only SpanFlat and typed spans are supported" in str(exc_info.value) + assert "Only SpanLegacy and typed spans are supported" in str(exc_info.value) class TestExtractSpanAttributes: - def test_extract_from_flat_span_with_metrics(self): - span = SpanFlat( + def test_extract_from_legacy_span_with_metrics(self): + span = SpanLegacy( name="test", span_id="1", parent_id=None, @@ -188,8 +188,8 @@ def test_extract_from_flat_span_with_metrics(self): assert attrs == {"tokens": 100, "latency": 0.5} assert attrs is not span.metrics - def test_extract_from_flat_span_without_metrics(self): - span = SpanFlat( + def test_extract_from_legacy_span_without_metrics(self): + span = SpanLegacy( name="test", span_id="1", parent_id=None, From 6c1034062cb782ce98588f3ea02afb3d4ae4e0ef Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Tue, 19 Aug 2025 14:49:01 +0200 Subject: [PATCH 07/10] dynamic schema version for filesystem --- nemoguardrails/tracing/adapters/filesystem.py | 32 ++++++++++++------- nemoguardrails/tracing/span_formatting.py | 9 ++++++ tests/tracing/adapters/test_filesystem.py | 2 +- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/nemoguardrails/tracing/adapters/filesystem.py b/nemoguardrails/tracing/adapters/filesystem.py index 5ffb61ffa..bd6c967e1 100644 --- a/nemoguardrails/tracing/adapters/filesystem.py +++ b/nemoguardrails/tracing/adapters/filesystem.py @@ -24,12 +24,14 @@ from nemoguardrails.tracing import InteractionLog from nemoguardrails.tracing.adapters.base import InteractionLogAdapter -from nemoguardrails.tracing.span_formatting import format_span_for_filesystem +from nemoguardrails.tracing.span_formatting import ( + format_span_for_filesystem, + get_schema_version_for_filesystem, +) class FileSystemAdapter(InteractionLogAdapter): name = "FileSystem" - SCHEMA_VERSION = "2.0" def __init__(self, filepath: Optional[str] = None): if not filepath: @@ -40,14 +42,17 @@ def __init__(self, filepath: Optional[str] = None): def transform(self, interaction_log: "InteractionLog"): """Transforms the InteractionLog into a JSON string.""" - spans = [] + spans = [ + format_span_for_filesystem(span_data) for span_data in interaction_log.trace + ] - for span_data in interaction_log.trace: - span_dict = format_span_for_filesystem(span_data) - spans.append(span_dict) + if not interaction_log.trace: + schema_version = None + else: + schema_version = get_schema_version_for_filesystem(interaction_log.trace[0]) log_dict = { - "schema_version": self.SCHEMA_VERSION, + "schema_version": schema_version, "trace_id": interaction_log.id, "spans": spans, } @@ -63,14 +68,17 @@ async def transform_async(self, interaction_log: "InteractionLog"): "aiofiles is required for async file writing. Please install it using `pip install aiofiles`" ) - spans = [] + spans = [ + format_span_for_filesystem(span_data) for span_data in interaction_log.trace + ] - for span_data in interaction_log.trace: - span_dict = format_span_for_filesystem(span_data) - spans.append(span_dict) + if not interaction_log.trace: + schema_version = None + else: + schema_version = get_schema_version_for_filesystem(interaction_log.trace[0]) log_dict = { - "schema_version": self.SCHEMA_VERSION, + "schema_version": schema_version, "trace_id": interaction_log.id, "spans": spans, } diff --git a/nemoguardrails/tracing/span_formatting.py b/nemoguardrails/tracing/span_formatting.py index 96470d7f8..1350171ba 100644 --- a/nemoguardrails/tracing/span_formatting.py +++ b/nemoguardrails/tracing/span_formatting.py @@ -20,6 +20,15 @@ from nemoguardrails.tracing.spans import SpanLegacy, is_opentelemetry_span +def get_schema_version_for_filesystem(span) -> str: + """Return the schema version string based on the span type.""" + if isinstance(span, SpanLegacy): + return "1.0" + if is_opentelemetry_span(span): + return "2.0" + raise ValueError(f"Unknown span type: {type(span).__name__}.") + + def format_span_for_filesystem(span) -> Dict[str, Any]: """Format any span type for JSON filesystem storage. diff --git a/tests/tracing/adapters/test_filesystem.py b/tests/tracing/adapters/test_filesystem.py index 97c77e618..b0c2d9659 100644 --- a/tests/tracing/adapters/test_filesystem.py +++ b/tests/tracing/adapters/test_filesystem.py @@ -138,7 +138,7 @@ def test_schema_version(self): with open(self.filepath, "r") as f: content = f.read() log_dict = json.loads(content.strip()) - self.assertEqual(log_dict["schema_version"], "2.0") + self.assertEqual(log_dict["schema_version"], "1.0") def test_span_legacy_with_metrics(self): adapter = FileSystemAdapter(filepath=self.filepath) From 12ef0645a62e8232ca3ea83805651309ecc82396 Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Fri, 22 Aug 2025 08:34:34 +0200 Subject: [PATCH 08/10] revert unexpected style changes to utils.py --- nemoguardrails/actions/llm/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nemoguardrails/actions/llm/utils.py b/nemoguardrails/actions/llm/utils.py index b1163081b..7b80d9d37 100644 --- a/nemoguardrails/actions/llm/utils.py +++ b/nemoguardrails/actions/llm/utils.py @@ -175,15 +175,15 @@ def get_colang_history( history += f'user "{event["text"]}"\n' elif event["type"] == "UserIntent": if include_texts: - history += f" {event['intent']}\n" + history += f' {event["intent"]}\n' else: - history += f"user {event['intent']}\n" + history += f'user {event["intent"]}\n' elif event["type"] == "BotIntent": # If we have instructions, we add them before the bot message. # But we only do that for the last bot message. if "instructions" in event and idx == last_bot_intent_idx: history += f"# {event['instructions']}\n" - history += f"bot {event['intent']}\n" + history += f'bot {event["intent"]}\n' elif event["type"] == "StartUtteranceBotAction" and include_texts: history += f' "{event["script"]}"\n' # We skip system actions from this log @@ -352,9 +352,9 @@ def flow_to_colang(flow: Union[dict, Flow]) -> str: if "_type" not in element: raise Exception("bla") if element["_type"] == "UserIntent": - colang_flow += f"user {element['intent_name']}\n" + colang_flow += f'user {element["intent_name"]}\n' elif element["_type"] == "run_action" and element["action_name"] == "utter": - colang_flow += f"bot {element['action_params']['value']}\n" + colang_flow += f'bot {element["action_params"]["value"]}\n' return colang_flow From b7449df7ba3f83fa8438787f8e3d59a2aabb05cb Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Fri, 22 Aug 2025 08:35:32 +0200 Subject: [PATCH 09/10] revert unexpected style changes to explain.py --- nemoguardrails/logging/explain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemoguardrails/logging/explain.py b/nemoguardrails/logging/explain.py index d9c282d15..edf7825c2 100644 --- a/nemoguardrails/logging/explain.py +++ b/nemoguardrails/logging/explain.py @@ -104,7 +104,7 @@ def print_llm_calls_summary(self): for i in range(len(self.llm_calls)): llm_call = self.llm_calls[i] msg = ( - f"{i + 1}. Task `{llm_call.task}` took {llm_call.duration:.2f} seconds " + f"{i+1}. Task `{llm_call.task}` took {llm_call.duration:.2f} seconds " + ( f"and used {llm_call.total_tokens} tokens." if total_tokens From be51f330e4b5c9eb6c8b43fc88bbd91668cf71ac Mon Sep 17 00:00:00 2001 From: Pouyanpi <13303554+Pouyanpi@users.noreply.github.com> Date: Fri, 22 Aug 2025 08:45:37 +0200 Subject: [PATCH 10/10] use Enum for span_format and enforce valid values --- nemoguardrails/rails/llm/config.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py index bbb9a18d3..35b3e18e6 100644 --- a/nemoguardrails/rails/llm/config.py +++ b/nemoguardrails/rails/llm/config.py @@ -358,6 +358,11 @@ class LogAdapterConfig(BaseModel): model_config = ConfigDict(extra="allow") +class SpanFormat(str, Enum): + legacy = "legacy" + opentelemetry = "opentelemetry" + + class TracingConfig(BaseModel): enabled: bool = False adapters: List[LogAdapterConfig] = Field( @@ -365,7 +370,7 @@ class TracingConfig(BaseModel): description="The list of tracing adapters to use. If not specified, the default adapters are used.", ) span_format: str = Field( - default="opentelemetry", + default=SpanFormat.opentelemetry, description="The span format to use. Options are 'legacy' (simple metrics) or 'opentelemetry' (OpenTelemetry semantic conventions).", ) enable_content_capture: bool = Field(