Add Open Telemetry instrumentation.

atheriel · atheriel · commit 6429125b225f · 2025-06-20T16:34:40.000-04:00
This commit instruments various operations with Open Telemetry spans that abide by the (still nascent) semantic conventions for Generative AI clients [0]. These conventions classify `ellmer` chatbots as "agents" due to their ability to run tool calls, so in fact there are three types of span: (1) a top-level `invoke_agent` span for each chat interaction; (2) `chat` spans that wrap model API calls; and (3) `execute_tool` spans that wrap tool calls on our end. There's currently no community concensus for how to attach turns to spans, so I've left that out for now. Example code: library(otelsdk) Sys.setenv(OTEL_TRACES_EXPORTER = "stderr") chat <- ellmer::chat_databricks(model = "databricks-claude-3-7-sonnet") chat$chat("Tell me a joke in the form of an SQL query.") Unit tests are included. [0]: https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/ Signed-off-by: Aaron Jacobs <aaron.jacobs@posit.co>
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -40,6 +40,8 @@ Suggests:
     knitr,
     magick,
     openssl,
+    otel (>= 0.0.0.9000),
+    otelsdk (>= 0.0.0.9000),
     paws.common,
     rmarkdown,
     shiny,
@@ -78,6 +80,7 @@ Collate:
     'import-standalone-purrr.R'
     'import-standalone-types-check.R'
     'interpolate.R'
+    'otel.R'
     'params.R'
     'provider-openai.R'
     'provider-azure.R'
@@ -108,3 +111,6 @@ Collate:
     'utils-prettytime.R'
     'utils.R'
     'zzz.R'
+Remotes: 
+    r-lib/otel,
+    r-lib/otelsdk
diff --git a/R/chat.R b/R/chat.R
@@ -492,6 +492,7 @@ Chat <- R6::R6Class(
     ) {
       tool_errors <- list()
       withr::defer(warn_tool_errors(tool_errors))
+      start_agent_span(private$provider)
 
       while (!is.null(user_turn)) {
         assistant_chunks <- private$submit_turns(
@@ -551,6 +552,8 @@ Chat <- R6::R6Class(
     ) {
       tool_errors <- list()
       withr::defer(warn_tool_errors(tool_errors))
+      span <- start_agent_span(private$provider, active = FALSE)
+      withr::defer(span$end())
 
       while (!is.null(user_turn)) {
         assistant_chunks <- private$submit_turns_async(
@@ -627,7 +630,7 @@ Chat <- R6::R6Class(
       if (echo == "all") {
         cat_line(format(user_turn), prefix = "> ")
       }
-
+      span <- start_chat_span(private$provider)
       response <- chat_perform(
         provider = private$provider,
         mode = if (stream) "stream" else "value",
@@ -654,9 +657,11 @@ Chat <- R6::R6Class(
 
           result <- stream_merge_chunks(private$provider, result, chunk)
         }
+        record_chat_span_status(span, result)
         turn <- value_turn(private$provider, result, has_type = !is.null(type))
         turn <- match_tools(turn, private$tools)
       } else {
+        record_chat_span_status(span, response)
         turn <- value_turn(
           private$provider,
           response,
@@ -709,6 +714,8 @@ Chat <- R6::R6Class(
       type = NULL,
       yield_as_content = FALSE
     ) {
+      span <- start_chat_span(private$provider, active = FALSE)
+      withr::defer(span$end())
       response <- chat_perform(
         provider = private$provider,
         mode = if (stream) "async-stream" else "async-value",
@@ -735,10 +742,12 @@ Chat <- R6::R6Class(
 
           result <- stream_merge_chunks(private$provider, result, chunk)
         }
+        record_chat_span_status(span, result)
         turn <- value_turn(private$provider, result, has_type = !is.null(type))
       } else {
         result <- await(response)
 
+        record_chat_span_status(span, result)
         turn <- value_turn(private$provider, result, has_type = !is.null(type))
         text <- turn@text
         if (!is.null(text)) {
diff --git a/R/content-tools.R b/R/content-tools.R
@@ -145,12 +145,14 @@ invoke_tool <- function(request) {
     return(args)
   }
 
+  span <- start_tool_span(request)
   tryCatch(
     {
       result <- do.call(request@tool@fun, args)
       new_tool_result(request, result)
     },
     error = function(e) {
+      record_tool_error(span, e)
       new_tool_result(request, error = e)
     }
   )
@@ -168,12 +170,15 @@ on_load(
       return(args)
     }
 
+    span <- start_tool_span(request, active = FALSE)
+    withr::defer(span$end())
     tryCatch(
       {
         result <- await(do.call(request@tool@fun, args))
         new_tool_result(request, result)
       },
       error = function(e) {
+        record_tool_error(span, e)
         new_tool_result(request, error = e)
       }
     )
diff --git a/R/otel.R b/R/otel.R
@@ -0,0 +1,142 @@
+# Starts an Open Telemetry span that abides by the semantic conventions for
+# Generative AI completions.
+#
+# See: https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#inference
+start_chat_span <- function(
+  provider,
+  tracer = default_tracer(),
+  scope = parent.frame(),
+  active = TRUE
+) {
+  if (is.null(tracer) || !tracer$is_enabled()) {
+    return(NULL)
+  }
+  # Ensure we set attributes relevant to sampling at span creation time.
+  attributes <- list(
+    "gen_ai.operation.name" = "chat",
+    "gen_ai.system" = tolower(provider@name),
+    "gen_ai.request.model" = provider@model
+  )
+  if (active) {
+    tracer$start_span(
+      name = sprintf("chat %s", provider@model),
+      options = list(kind = "CLIENT"),
+      attributes = attributes,
+      scope = scope
+    )
+  } else {
+    tracer$start_session(
+      name = sprintf("chat %s", provider@model),
+      options = list(kind = "CLIENT"),
+      attributes = attributes,
+      session_scope = scope
+    )
+  }
+}
+
+record_chat_span_status <- function(span, result) {
+  if (is.null(span) || !span$is_recording()) {
+    return(invisible(span))
+  }
+  if (!is.null(result$model)) {
+    span$set_attribute("gen_ai.response.model", result$model)
+  }
+  if (!is.null(result$id)) {
+    span$set_attribute("gen_ai.response.id", result$id)
+  }
+  if (!is.null(result$usage)) {
+    span$set_attribute("gen_ai.usage.input_tokens", result$usage$prompt_tokens)
+    span$set_attribute(
+      "gen_ai.usage.output_tokens",
+      result$usage$completion_tokens
+    )
+  }
+  # TODO: Consider setting gen_ai.response.finish_reasons.
+  span$set_status("ok")
+}
+
+# Starts an Open Telemetry span that abides by the semantic conventions for
+# Generative AI tool calls.
+#
+# See: https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
+start_tool_span <- function(
+  request,
+  tracer = default_tracer(),
+  scope = parent.frame(),
+  active = TRUE
+) {
+  if (is.null(tracer) || !tracer$is_enabled()) {
+    return(NULL)
+  }
+  attributes <- compact(list(
+    "gen_ai.operation.name" = "execute_tool",
+    "gen_ai.tool.name" = request@tool@name,
+    "gen_ai.tool.description" = request@tool@description,
+    "gen_ai.tool.call.id" = request@id
+  ))
+  if (active) {
+    tracer$start_span(
+      name = sprintf("execute_tool %s", request@tool@name),
+      options = list(kind = "INTERNAL"),
+      attributes = attributes,
+      scope = scope
+    )
+  } else {
+    tracer$start_session(
+      name = sprintf("execute_tool %s", request@tool@name),
+      options = list(kind = "INTERNAL"),
+      attributes = attributes,
+      session_scope = scope
+    )
+  }
+}
+
+record_tool_error <- function(span, error) {
+  if (is.null(span) || !span$is_recording()) {
+    return()
+  }
+  span$record_exception(error)
+  span$set_status("error")
+  span$set_attribute("error.type", class(error)[1])
+}
+
+# Starts an Open Telemetry span that abides by the semantic conventions for
+# Generative AI "agents".
+#
+# See: https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#inference
+start_agent_span <- function(
+  provider,
+  tracer = default_tracer(),
+  scope = parent.frame(),
+  active = TRUE
+) {
+  if (is.null(tracer) || !tracer$is_enabled()) {
+    return(NULL)
+  }
+  attributes <- list(
+    "gen_ai.operation.name" = "chat",
+    "gen_ai.system" = tolower(provider@name)
+  )
+  if (active) {
+    tracer$start_span(
+      name = "invoke_agent",
+      options = list(kind = "CLIENT"),
+      attributes = attributes,
+      scope = scope
+    )
+  } else {
+    tracer$start_session(
+      name = "invoke_agent",
+      options = list(kind = "CLIENT"),
+      attributes = attributes,
+      session_scope = scope
+    )
+  }
+}
+
+default_tracer <- function() {
+  if (!is_installed("otel")) {
+    return(NULL)
+  }
+  otel::get_tracer("ellmer")
+}
diff --git a/tests/testthat/test-otel.R b/tests/testthat/test-otel.R

Original file line number	Diff line number	Diff line change
`@@ -145,12 +145,14 @@ invoke_tool <- function(request) {`
`145`	`145`	`return(args)`
`146`	`146`	`}`
`147`	`147`
	`148`	`+ span <- start_tool_span(request)`
`148`	`149`	`tryCatch(`
`149`	`150`	`{`
`150`	`151`	`result <- do.call(request@tool@fun, args)`
`151`	`152`	`new_tool_result(request, result)`
`152`	`153`	`},`
`153`	`154`	`error = function(e) {`
	`155`	`+ record_tool_error(span, e)`
`154`	`156`	`new_tool_result(request, error = e)`
`155`	`157`	`}`
`156`	`158`	`)`
`@@ -168,12 +170,15 @@ on_load(`
`168`	`170`	`return(args)`
`169`	`171`	`}`
`170`	`172`
	`173`	`+ span <- start_tool_span(request, active = FALSE)`
	`174`	`+ withr::defer(span$end())`
`171`	`175`	`tryCatch(`
`172`	`176`	`{`
`173`	`177`	`result <- await(do.call(request@tool@fun, args))`
`174`	`178`	`new_tool_result(request, result)`
`175`	`179`	`},`
`176`	`180`	`error = function(e) {`
	`181`	`+ record_tool_error(span, e)`
`177`	`182`	`new_tool_result(request, error = e)`
`178`	`183`	`}`
`179`	`184`	`)`