Skip to content

Commit 3e072af

Browse files
nshalmanclaude
andcommitted
Implement tracing-opentelemetry
Co-Authored-By: Claude <[email protected]>
1 parent 69b5adc commit 3e072af

File tree

9 files changed

+2058
-1039
lines changed

9 files changed

+2058
-1039
lines changed

Cargo.lock

Lines changed: 796 additions & 527 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dropshot/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ tokio-rustls = "0.25.0"
5050
toml = "0.8.23"
5151
waitgroup = "0.1.2"
5252

53+
opentelemetry = { version = "0.30", optional = true }
54+
opentelemetry-http = { version = "0.30", features = ["hyper"], optional = true }
55+
opentelemetry-semantic-conventions = { version = "0.30", optional = true }
56+
opentelemetry-otlp = { version = "0.30", features = ["tonic"], optional = true }
57+
opentelemetry_sdk = { version = "0.30", features = ["rt-tokio"], optional = true }
58+
opentelemetry-stdout = { version = "0.30", optional = true }
59+
tracing-opentelemetry = { version = "0.31", optional = true }
60+
5361
[dependencies.chrono]
5462
version = "0.4.41"
5563
features = [ "serde", "std", "clock" ]
@@ -139,6 +147,7 @@ version_check = "0.9.5"
139147
usdt-probes = ["usdt/asm"]
140148
internal-docs = ["simple-mermaid"]
141149
tracing = ["dep:tracing", "dep:tracing-subscriber"]
150+
otel-tracing = ["tracing", "opentelemetry", "opentelemetry-http", "opentelemetry-semantic-conventions", "opentelemetry-otlp", "opentelemetry_sdk", "opentelemetry-stdout", "tracing-opentelemetry"]
142151

143152
[package.metadata.docs.rs]
144153
features = ["internal-docs"]

dropshot/examples/otel.rs

Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
// Copyright 2025 Oxide Computer Company
2+
//! Example use of Dropshot with OpenTelemetry integration.
3+
//!
4+
//! Dropshot's built-in OpenTelemetry support will automatically parse
5+
//! standard OTEL environment variables.
6+
//! If you launch an otel-collector or otel-enabled jaeger-all-in-one
7+
//! listening for otlp over http, then you can do:
8+
//!
9+
//! ```bash
10+
//! export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
11+
//! export OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
12+
//! cargo run --features=otel-tracing --example otel&
13+
//! curl http://localhost:4000/get
14+
//! ```
15+
//!
16+
//! And you should see an example trace.
17+
18+
use dropshot::endpoint;
19+
use dropshot::ApiDescription;
20+
use dropshot::ConfigDropshot;
21+
use dropshot::ConfigLogging;
22+
use dropshot::ConfigLoggingLevel;
23+
use dropshot::HttpError;
24+
use dropshot::HttpResponseOk;
25+
use dropshot::HttpResponseUpdatedNoContent;
26+
use dropshot::HttpServerStarter;
27+
use dropshot::RequestContext;
28+
use dropshot::TypedBody;
29+
use schemars::JsonSchema;
30+
use serde::Deserialize;
31+
use serde::Serialize;
32+
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
33+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
34+
use tracing;
35+
36+
#[tokio::main]
37+
async fn main() -> Result<(), String> {
38+
let config_dropshot = ConfigDropshot {
39+
bind_address: "127.0.0.1:4000".parse().unwrap(),
40+
..Default::default()
41+
};
42+
43+
// For simplicity, we'll configure an "info"-level logger that writes to
44+
// stderr assuming that it's a terminal.
45+
let config_logging =
46+
ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info };
47+
let log = config_logging
48+
.to_logger("example-basic")
49+
.map_err(|error| format!("failed to create logger: {}", error))?;
50+
51+
// Initialize tracing with both slog bridge and OpenTelemetry support
52+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
53+
let _tracing_guard = dropshot::tracing_support::init_tracing(&log)
54+
.await
55+
.map_err(|e| format!("failed to initialize tracing: {}", e))?;
56+
57+
// Build a description of the API.
58+
let mut api = ApiDescription::new();
59+
api.register(example_api_get_counter).unwrap();
60+
api.register(example_api_put_counter).unwrap();
61+
api.register(example_api_get).unwrap();
62+
api.register(example_api_error).unwrap();
63+
api.register(example_api_panic).unwrap();
64+
api.register(example_api_sleep).unwrap();
65+
api.register(example_api_exit).unwrap();
66+
67+
// The functions that implement our API endpoints will share this context.
68+
let api_context = ExampleContext::new();
69+
70+
// Set up the server.
71+
let server =
72+
HttpServerStarter::new(&config_dropshot, api, api_context, &log)
73+
.map_err(|error| format!("failed to create server: {}", error))?
74+
.start();
75+
76+
let shutdown = server.wait_for_shutdown();
77+
78+
tokio::task::spawn(async move {
79+
loop {
80+
if server.app_private().shutdown.load(Ordering::SeqCst) {
81+
break;
82+
} else {
83+
tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
84+
}
85+
}
86+
server.close().await.unwrap();
87+
});
88+
89+
// From a separate task, wait for the server to stop.
90+
shutdown.await
91+
}
92+
93+
/// Application-specific example context (state shared by handler functions)
94+
#[derive(Debug)]
95+
struct ExampleContext {
96+
/// counter that can be manipulated by requests to the HTTP API
97+
counter: AtomicU64,
98+
shutdown: AtomicBool,
99+
}
100+
101+
impl ExampleContext {
102+
/// Return a new ExampleContext.
103+
pub fn new() -> ExampleContext {
104+
ExampleContext {
105+
counter: AtomicU64::new(0),
106+
shutdown: AtomicBool::new(false),
107+
}
108+
}
109+
}
110+
111+
// HTTP API interface
112+
113+
/// `CounterValue` represents the value of the API's counter, either as the
114+
/// response to a GET request to fetch the counter or as the body of a PUT
115+
/// request to update the counter.
116+
#[derive(Debug, Deserialize, Serialize, JsonSchema)]
117+
struct CounterValue {
118+
counter: u64,
119+
}
120+
121+
/// Demonstrates creating child spans for internal operations using tracing instrumentation
122+
#[endpoint {
123+
method = GET,
124+
path = "/get",
125+
}]
126+
#[cfg_attr(
127+
any(feature = "tracing", feature = "otel-tracing"),
128+
tracing::instrument(skip(rqctx), fields(counter_processing = tracing::field::Empty)))]
129+
async fn example_api_get(
130+
rqctx: RequestContext<ExampleContext>,
131+
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
132+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
133+
tracing::info!("Starting counter fetch with processing");
134+
135+
// Simulate some work
136+
fetch_counter_with_delay().await;
137+
138+
let api_context = rqctx.context();
139+
let counter_value = api_context.counter.load(Ordering::SeqCst);
140+
141+
// Do some "processing" that would benefit from being traced
142+
let processed_value = process_counter_value(counter_value).await;
143+
144+
// Record the processing result in the span
145+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
146+
tracing::Span::current().record("counter_processing", processed_value);
147+
148+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
149+
tracing::info!(
150+
processed_value = processed_value,
151+
"Counter processing completed"
152+
);
153+
154+
Ok(HttpResponseOk(CounterValue { counter: processed_value }))
155+
}
156+
157+
#[cfg_attr(
158+
any(feature = "tracing", feature = "otel-tracing"),
159+
tracing::instrument
160+
)]
161+
async fn fetch_counter_with_delay() {
162+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
163+
tracing::debug!("Simulating work");
164+
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
165+
}
166+
167+
#[cfg_attr(
168+
any(feature = "tracing", feature = "otel-tracing"),
169+
tracing::instrument
170+
)]
171+
async fn process_counter_value(counter_value: u64) -> u64 {
172+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
173+
tracing::debug!(input_value = counter_value, "Processing counter value");
174+
tokio::time::sleep(std::time::Duration::from_millis(5)).await;
175+
let result = counter_value * 2; // Some arbitrary processing
176+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
177+
tracing::debug!(output_value = result, "Counter processing complete");
178+
result
179+
}
180+
181+
/// Fetch the current value of the counter.
182+
#[endpoint {
183+
method = GET,
184+
path = "/counter",
185+
}]
186+
#[cfg_attr(
187+
any(feature = "tracing", feature = "otel-tracing"),
188+
tracing::instrument(skip(rqctx))
189+
)]
190+
async fn example_api_get_counter(
191+
rqctx: RequestContext<ExampleContext>,
192+
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
193+
let api_context = rqctx.context();
194+
let counter = api_context.counter.load(Ordering::SeqCst);
195+
196+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
197+
tracing::info!(counter_value = counter, "Retrieved counter value");
198+
199+
Ok(HttpResponseOk(CounterValue { counter }))
200+
}
201+
202+
/// Demonstrates error tracing - errors will be marked on the span
203+
#[endpoint {
204+
method = GET,
205+
path = "/error",
206+
}]
207+
#[cfg_attr(
208+
any(feature = "tracing", feature = "otel-tracing"),
209+
tracing::instrument(skip(_rqctx))
210+
)]
211+
async fn example_api_error(
212+
_rqctx: RequestContext<ExampleContext>,
213+
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
214+
// XXX this should always show up in the logs
215+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
216+
tracing::warn!("About to return an error for demonstration XXX FINDME");
217+
let error =
218+
HttpError::for_internal_error("This endpoint is broken".to_string());
219+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
220+
tracing::error!(error = ?error, "Returning demonstration error");
221+
Err(error)
222+
}
223+
224+
/// Demonstrates panic handling - panics are converted to 500 errors and traced
225+
#[endpoint {
226+
method = GET,
227+
path = "/panic",
228+
}]
229+
#[cfg_attr(
230+
any(feature = "tracing", feature = "otel-tracing"),
231+
tracing::instrument(skip(_rqctx))
232+
)]
233+
async fn example_api_panic(
234+
_rqctx: RequestContext<ExampleContext>,
235+
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
236+
panic!("This handler panics to demonstrate error tracing");
237+
}
238+
239+
/// Takes too long so the client disconnects
240+
#[endpoint {
241+
method = GET,
242+
path = "/sleep",
243+
}]
244+
#[cfg_attr(
245+
any(feature = "tracing", feature = "otel-tracing"),
246+
tracing::instrument(skip(_rqctx))
247+
)]
248+
async fn example_api_sleep(
249+
_rqctx: RequestContext<ExampleContext>,
250+
) -> Result<HttpResponseOk<CounterValue>, HttpError> {
251+
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
252+
Err(HttpError::for_internal_error(
253+
"This endpoint takes too long".to_string(),
254+
))
255+
}
256+
257+
/// Exit shortcut
258+
#[endpoint {
259+
method = GET,
260+
path = "/exit",
261+
}]
262+
#[cfg_attr(
263+
any(feature = "tracing", feature = "otel-tracing"),
264+
tracing::instrument(skip(rqctx))
265+
)]
266+
async fn example_api_exit(
267+
rqctx: RequestContext<ExampleContext>,
268+
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
269+
rqctx.context().shutdown.store(true, Ordering::SeqCst);
270+
Ok(HttpResponseUpdatedNoContent())
271+
}
272+
273+
/// Update the current value of the counter. Note that the special value of 10
274+
/// is not allowed (just to demonstrate how to generate an error).
275+
#[endpoint {
276+
method = PUT,
277+
path = "/counter",
278+
}]
279+
#[cfg_attr(
280+
any(feature = "tracing", feature = "otel-tracing"),
281+
tracing::instrument(skip(rqctx, update), fields(new_value = tracing::field::Empty)))]
282+
async fn example_api_put_counter(
283+
rqctx: RequestContext<ExampleContext>,
284+
update: TypedBody<CounterValue>,
285+
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
286+
let api_context = rqctx.context();
287+
let updated_value = update.into_inner();
288+
289+
// Record the new value in the span
290+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
291+
tracing::Span::current().record("new_value", updated_value.counter);
292+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
293+
tracing::info!(
294+
new_counter_value = updated_value.counter,
295+
"Updating counter"
296+
);
297+
298+
if updated_value.counter == 10 {
299+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
300+
tracing::warn!(
301+
rejected_value = updated_value.counter,
302+
"Rejecting forbidden value"
303+
);
304+
Err(HttpError::for_bad_request(
305+
Some(String::from("BadInput")),
306+
format!("do not like the number {}", updated_value.counter),
307+
))
308+
} else {
309+
api_context.counter.store(updated_value.counter, Ordering::SeqCst);
310+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
311+
tracing::info!(
312+
updated_counter = updated_value.counter,
313+
"Counter updated successfully"
314+
);
315+
Ok(HttpResponseUpdatedNoContent())
316+
}
317+
}

dropshot/src/handler.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ where
287287
/// This type is not exported to Dropshot consumers; it is purely an internal
288288
/// implementation detail of the interface between `HttpHandlerFunc` and the
289289
/// server.
290+
#[derive(Debug)]
290291
pub enum HandlerError {
291292
/// An error returned by a fallible handler function itself.
292293
///

dropshot/src/lib.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -852,11 +852,15 @@ mod from_map;
852852
mod handler;
853853
mod http_util;
854854
mod logging;
855+
#[cfg(feature = "otel-tracing")]
856+
pub mod otel;
855857
mod pagination;
856858
mod router;
857859
mod schema_util;
858860
mod server;
859861
mod to_map;
862+
#[cfg(any(feature = "tracing", feature = "otel-tracing"))]
863+
pub mod tracing_support;
860864
mod type_util;
861865
mod versioning;
862866
mod websocket;
@@ -946,8 +950,6 @@ pub use server::ServerBuilder;
946950
pub use server::ServerContext;
947951
pub use server::ShutdownWaitFuture;
948952
pub use server::{HttpServer, HttpServerStarter};
949-
#[cfg(feature = "tracing")]
950-
pub use tracing::{debug, error, info, trace, warn}; // Re-export tracing macros for convenience
951953
pub use versioning::ClientSpecifiesVersionInHeader;
952954
pub use versioning::DynamicVersionPolicy;
953955
pub use versioning::VersionPolicy;

0 commit comments

Comments
 (0)