Skip to content

Commit b2f0da6

Browse files
committed
refactor(tracer): add checklocks annotations to Span struct
Add compile-time lock verification annotations to the Span struct, the most critical data structure in the tracing library. Signed-off-by: Kemal Akkoyun <kemal.akkoyun@datadoghq.com>
1 parent 7310a47 commit b2f0da6

File tree

13 files changed

+184
-24
lines changed

13 files changed

+184
-24
lines changed

ddtrace/tracer/abandonedspans.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ type abandonedSpanCandidate struct {
8282
Integration string
8383
}
8484

85+
// +checklocksignore
8586
func newAbandonedSpanCandidate(s *Span, finished bool) *abandonedSpanCandidate {
8687
var component string
8788
if v, ok := s.meta[ext.Component]; ok {

ddtrace/tracer/civisibility_tslv.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ type ciVisibilityEvent struct {
160160
//
161161
// key - The tag key.
162162
// value - The tag value.
163+
// +checklocksignore
163164
func (e *ciVisibilityEvent) SetTag(key string, value interface{}) {
164165
e.span.SetTag(key, value)
165166
e.Content.Meta = e.span.meta
@@ -171,6 +172,7 @@ func (e *ciVisibilityEvent) SetTag(key string, value interface{}) {
171172
// Parameters:
172173
//
173174
// operationName - The new operation name.
175+
// +checklocksignore
174176
func (e *ciVisibilityEvent) SetOperationName(operationName string) {
175177
e.span.SetOperationName(operationName)
176178
e.Content.Name = e.span.name
@@ -246,6 +248,8 @@ type tslvSpan struct {
246248
// Returns:
247249
//
248250
// A pointer to the created ciVisibilityEvent.
251+
//
252+
// +checklocksignore
249253
func getCiVisibilityEvent(span *Span) *ciVisibilityEvent {
250254
switch span.spanType {
251255
case constants.SpanTypeTest:
@@ -270,6 +274,8 @@ func getCiVisibilityEvent(span *Span) *ciVisibilityEvent {
270274
// Returns:
271275
//
272276
// A pointer to the created ciVisibilityEvent.
277+
//
278+
// +checklocksignore
273279
func createTestEventFromSpan(span *Span) *ciVisibilityEvent {
274280
tSpan := createTslvSpan(span)
275281
tSpan.ParentID = 0
@@ -296,6 +302,8 @@ func createTestEventFromSpan(span *Span) *ciVisibilityEvent {
296302
// Returns:
297303
//
298304
// A pointer to the created ciVisibilityEvent.
305+
//
306+
// +checklocksignore
299307
func createTestSuiteEventFromSpan(span *Span) *ciVisibilityEvent {
300308
tSpan := createTslvSpan(span)
301309
tSpan.ParentID = 0
@@ -319,6 +327,8 @@ func createTestSuiteEventFromSpan(span *Span) *ciVisibilityEvent {
319327
// Returns:
320328
//
321329
// A pointer to the created ciVisibilityEvent.
330+
//
331+
// +checklocksignore
322332
func createTestModuleEventFromSpan(span *Span) *ciVisibilityEvent {
323333
tSpan := createTslvSpan(span)
324334
tSpan.ParentID = 0
@@ -341,6 +351,8 @@ func createTestModuleEventFromSpan(span *Span) *ciVisibilityEvent {
341351
// Returns:
342352
//
343353
// A pointer to the created ciVisibilityEvent.
354+
//
355+
// +checklocksignore
344356
func createTestSessionEventFromSpan(span *Span) *ciVisibilityEvent {
345357
tSpan := createTslvSpan(span)
346358
tSpan.ParentID = 0
@@ -362,6 +374,8 @@ func createTestSessionEventFromSpan(span *Span) *ciVisibilityEvent {
362374
// Returns:
363375
//
364376
// A pointer to the created ciVisibilityEvent.
377+
//
378+
// +checklocksignore
365379
func createSpanEventFromSpan(span *Span) *ciVisibilityEvent {
366380
tSpan := createTslvSpan(span)
367381
tSpan.SpanID = span.spanID
@@ -383,6 +397,8 @@ func createSpanEventFromSpan(span *Span) *ciVisibilityEvent {
383397
// Returns:
384398
//
385399
// The created tslvSpan.
400+
//
401+
// +checklocksignore
386402
func createTslvSpan(span *Span) tslvSpan {
387403
return tslvSpan{
388404
Name: span.name,

ddtrace/tracer/context.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ func SpanFromContext(ctx context.Context) (*Span, bool) {
7676
// StartSpanFromContext returns a new span with the given operation name and options. If a span
7777
// is found in the context, it will be used as the parent of the resulting span. If the ChildOf
7878
// option is passed, it will only be used as the parent if there is no span found in `ctx`.
79+
// +checklocksignore
7980
func StartSpanFromContext(ctx context.Context, operationName string, opts ...StartSpanOption) (*Span, context.Context) {
8081
// copy opts in case the caller reuses the slice in parallel
8182
// we will add at least 1, at most 2 items

ddtrace/tracer/payload_v1.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ func newPayloadV1() *payloadV1 {
103103
}
104104

105105
// push pushes a new item (a traceChunk)into the payload.
106+
// +checklocksignore
106107
func (p *payloadV1) push(t spanList) (stats payloadStats, err error) {
107108
// We need to hydrate the payload with everything we get from the spans.
108109
// Conceptually, our `t spanList` corresponds to one `traceChunk`.
@@ -421,6 +422,7 @@ func (p *payloadV1) encodeTraceChunks(bm bitmap, fieldID int, tc []traceChunk, s
421422
}
422423

423424
// encodeSpans encodes a list of spans associated with fieldID into p.buf in msgp format.
425+
// +checklocksignore
424426
func (p *payloadV1) encodeSpans(bm bitmap, fieldID int, spans spanList, st *stringTable) (bool, error) {
425427
if len(spans) == 0 || !bm.contains(uint32(fieldID)) {
426428
return false, nil
@@ -1069,6 +1071,7 @@ func decodeSpans(b []byte, st *stringTable) (spanList, []byte, error) {
10691071

10701072
// decode reads a span from a byte slice and populates the associated fields in the span.
10711073
// This should only be used with decoding v1.0 payloads.
1074+
// +checklocksignore
10721075
func (span *Span) decode(b []byte, st *stringTable) ([]byte, error) {
10731076
numFields, o, err := msgp.ReadMapHeaderBytes(b)
10741077
for range numFields {

ddtrace/tracer/rules_sampler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ func (sr *SamplingRule) EqualsFalseNegative(other *SamplingRule) bool {
187187
}
188188

189189
// match returns true when the span's details match all the expected values in the rule.
190+
// +checklocksignore
190191
func (sr *SamplingRule) match(s *Span) bool {
191192
if sr.Service != nil && !sr.Service.MatchString(s.service) {
192193
return false
@@ -476,6 +477,7 @@ func (rs *traceRulesSampler) sampleRules(span *Span) bool {
476477
return true
477478
}
478479

480+
// +checklocksignore
479481
func (rs *traceRulesSampler) applyRate(span *Span, rate float64, now time.Time, sampler samplernames.SamplerName) {
480482
// Use the helper method to apply the rate and execute sampling logic with the lock held
481483
span.applyRateWithLock(rate, func() {
@@ -547,6 +549,7 @@ func (rs *singleSpanRulesSampler) enabled() bool {
547549
// apply uses the sampling rules to determine the sampling rate for the
548550
// provided span. If the rules don't match, then it returns false and the span is not
549551
// modified.
552+
// +checklocksignore
550553
func (rs *singleSpanRulesSampler) apply(span *Span) bool {
551554
for _, rule := range rs.rules {
552555
if rule.match(span) {

ddtrace/tracer/sampler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func (r *rateSampler) SetRate(rate float64) {
9292
const knuthFactor = uint64(1111111111111111111)
9393

9494
// Sample returns true if the given span should be sampled.
95+
// +checklocksignore
9596
func (r *rateSampler) Sample(s *Span) bool {
9697
if r.rate == 1 {
9798
// fast path
@@ -185,6 +186,7 @@ func (ps *prioritySampler) readRatesJSON(rc io.ReadCloser) error {
185186

186187
// getRate returns the sampling rate to be used for the given span. Callers must
187188
// guard the span.
189+
// +checklocksignore
188190
func (ps *prioritySampler) getRate(spn *Span) float64 {
189191
key := serviceEnvKey{service: spn.service, env: spn.meta[ext.Environment]}
190192
ps.mu.RLock()
@@ -197,6 +199,7 @@ func (ps *prioritySampler) getRate(spn *Span) float64 {
197199

198200
// apply applies sampling priority to the given span. Caller must ensure it is safe
199201
// to modify the span.
202+
// +checklocksignore
200203
func (ps *prioritySampler) apply(spn *Span) {
201204
rate := ps.getRate(spn)
202205
if sampledByRate(spn.traceID, rate) {

ddtrace/tracer/span.go

Lines changed: 64 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
//msgp:ignore inheritedData
77
//go:generate go run github.com/tinylib/msgp -unexported -marshal=false -o=span_msgp.go -tests=false
8+
//go:generate go run ../../scripts/msgp_checklocks_ignore.go -type Span -file span_msgp.go
89

910
package tracer
1011

@@ -70,6 +71,7 @@ type errorConfig struct {
7071
// properties set at any time during normal operation! This is used for testing only,
7172
// and should not be used in non-test code, or you may run into performance or other
7273
// issues.
74+
// +checklocksignore
7375
func (s *Span) AsMap() map[string]interface{} {
7476
m := make(map[string]interface{})
7577
if s == nil {
@@ -100,6 +102,7 @@ func (s *Span) AsMap() map[string]interface{} {
100102
return m
101103
}
102104

105+
// +checklocksignore
103106
func (s *Span) spanEventsAsJSONString() string {
104107
if !s.supportsEvents {
105108
return s.meta["events"]
@@ -118,36 +121,56 @@ func (s *Span) spanEventsAsJSONString() string {
118121
// Span represents a computation. Callers must call Finish when a Span is
119122
// complete to ensure it's submitted.
120123
type Span struct {
121-
mu locking.RWMutex `msg:"-"` // all fields are protected by this RWMutex
122-
123-
name string `msg:"name"` // operation name
124-
service string `msg:"service"` // service name (i.e. "grpc.server", "http.request")
125-
resource string `msg:"resource"` // resource name (i.e. "/user?id=123", "SELECT * FROM users")
126-
spanType string `msg:"type"` // protocol associated with the span (i.e. "web", "db", "cache")
127-
start int64 `msg:"start"` // span start time expressed in nanoseconds since epoch
128-
duration int64 `msg:"duration"` // duration of the span expressed in nanoseconds
129-
meta map[string]string `msg:"meta,omitempty"` // arbitrary map of metadata
130-
metaStruct metaStructMap `msg:"meta_struct,omitempty"` // arbitrary map of metadata with structured values
131-
metrics map[string]float64 `msg:"metrics,omitempty"` // arbitrary map of numeric metrics
132-
spanID uint64 `msg:"span_id"` // identifier of this span
133-
traceID uint64 `msg:"trace_id"` // lower 64-bits of the root span identifier
134-
parentID uint64 `msg:"parent_id"` // identifier of the span's direct parent
135-
error int32 `msg:"error"` // error status of the span; 0 means no errors
136-
spanLinks []SpanLink `msg:"span_links,omitempty"` // links to other spans
137-
spanEvents []spanEvent `msg:"span_events,omitempty"` // events produced related to this span
124+
// guards below fields
125+
mu locking.RWMutex `msg:"-"`
126+
127+
// +checklocks:mu
128+
name string `msg:"name"` // operation name
129+
// +checklocks:mu
130+
service string `msg:"service"` // service name (i.e. "grpc.server", "http.request")
131+
// +checklocks:mu
132+
resource string `msg:"resource"` // resource name (i.e. "/user?id=123", "SELECT * FROM users")
133+
// +checklocks:mu
134+
spanType string `msg:"type"` // protocol associated with the span (i.e. "web", "db", "cache")
135+
// +checklocks:mu
136+
start int64 `msg:"start"` // span start time expressed in nanoseconds since epoch
137+
// +checklocks:mu
138+
duration int64 `msg:"duration"` // duration of the span expressed in nanoseconds
139+
// +checklocks:mu
140+
meta map[string]string `msg:"meta,omitempty"` // arbitrary map of metadata
141+
// +checklocks:mu
142+
metaStruct metaStructMap `msg:"meta_struct,omitempty"` // arbitrary map of metadata with structured values
143+
// +checklocks:mu
144+
metrics map[string]float64 `msg:"metrics,omitempty"` // arbitrary map of numeric metrics
145+
// +checklocks:mu
146+
spanID uint64 `msg:"span_id"` // identifier of this span
147+
// +checklocks:mu
148+
traceID uint64 `msg:"trace_id"` // lower 64-bits of the root span identifier
149+
// +checklocks:mu
150+
parentID uint64 `msg:"parent_id"` // identifier of the span's direct parent
151+
// +checklocks:mu
152+
error int32 `msg:"error"` // error status of the span; 0 means no errors
153+
// +checklocks:mu
154+
spanLinks []SpanLink `msg:"span_links,omitempty"` // links to other spans
155+
// +checklocks:mu
156+
spanEvents []spanEvent `msg:"span_events,omitempty"` // events produced related to this span
138157

139158
goExecTraced bool `msg:"-"`
140159
noDebugStack bool `msg:"-"` // disables debug stack traces
141-
finished bool `msg:"-"` // true if the span has been submitted to a tracer. Can only be read/modified if the trace is locked.
142160
context *SpanContext `msg:"-"` // span propagation context
143-
integration string `msg:"-"` // where the span was started from, such as a specific contrib or "manual"
144161
supportsEvents bool `msg:"-"` // whether the span supports native span events or not
145162

146-
pprofCtxActive context.Context `msg:"-"` // contains pprof.WithLabel labels to tell the profiler more about this span
163+
// +checklocks:mu
164+
finished bool `msg:"-"` // true if the span has been submitted to a tracer. Can only be read/modified if the trace is locked.
165+
// +checklocks:mu
166+
integration string `msg:"-"` // where the span was started from, such as a specific contrib or "manual"
167+
// +checklocks:mu
168+
pprofCtxActive context.Context `msg:"-"` // contains pprof.WithLabel labels to tell the profiler more about this span
169+
147170
pprofCtxRestore context.Context `msg:"-"` // contains pprof.WithLabel labels of the parent span (if any) that need to be restored when this span finishes
148171

149-
// ends execution tracer (runtime/trace) task, if started
150-
taskEnd func() // +checklocks:mu
172+
// +checklocks:mu
173+
taskEnd func() // ends execution tracer (runtime/trace) task, if started
151174
}
152175

153176
// Context yields the SpanContext for this Span. Note that the return
@@ -540,6 +563,8 @@ func (s *Span) StartChild(operationName string, opts ...StartSpanOption) *Span {
540563

541564
// setSamplingPriorityLocked updates the sampling priority.
542565
// It also updates the trace's sampling priority.
566+
// s.mu must be held for writing.
567+
// +checklocks:s.mu
543568
func (s *Span) setSamplingPriorityLocked(priority int, sampler samplernames.SamplerName) {
544569
assert.RWMutexLocked(&s.mu)
545570
// We don't lock spans when flushing, so we could have a data race when
@@ -556,6 +581,8 @@ func (s *Span) setSamplingPriorityLocked(priority int, sampler samplernames.Samp
556581
// If the trace is locked, the sampling priority is forced to the given value.
557582
//
558583
// This function is should only be used when applying a manual keep or drop decision.
584+
// s.mu must be held for writing.
585+
// +checklocks:s.mu
559586
func (s *Span) forceSetSamplingPriorityLocked(priority int, sampler samplernames.SamplerName) {
560587
assert.RWMutexLocked(&s.mu)
561588
// We don't lock spans when flushing, so we could have a data race when
@@ -569,7 +596,8 @@ func (s *Span) forceSetSamplingPriorityLocked(priority int, sampler samplernames
569596
}
570597

571598
// setTagErrorLocked sets the error tag. It accounts for various valid scenarios.
572-
// This method assumes the span lock is already held.
599+
// s.mu must be held for writing.
600+
// +checklocks:s.mu
573601
func (s *Span) setTagErrorLocked(value interface{}, cfg errorConfig) {
574602
assert.RWMutexLocked(&s.mu)
575603
setError := func(yes bool) {
@@ -659,12 +687,14 @@ func (s *Span) setMeta(key, v string) {
659687
}
660688

661689
// setMetaLocked sets a string tag. This method assumes the span lock is already held.
690+
// +checklocks:s.mu
662691
func (s *Span) setMetaLocked(key, v string) {
663692
assert.RWMutexLocked(&s.mu)
664693
s.setMetaInit(key, v)
665694
}
666695

667696
// setMetaInit sets a string tag without acquiring the lock and asserting the lock is held.
697+
// +checklocksignore
668698
func (s *Span) setMetaInit(key, v string) {
669699
if s.meta == nil {
670700
s.meta = make(map[string]string, 1)
@@ -685,6 +715,7 @@ func (s *Span) setMetaInit(key, v string) {
685715
}
686716

687717
// setMetaStructLocked sets structured metadata. This method assumes the span lock is already held.
718+
// +checklocks:s.mu
688719
func (s *Span) setMetaStructLocked(key string, v any) {
689720
assert.RWMutexLocked(&s.mu)
690721
if s.metaStruct == nil {
@@ -694,6 +725,7 @@ func (s *Span) setMetaStructLocked(key string, v any) {
694725
}
695726

696727
// setTagBoolLocked sets a boolean tag on the span. This method assumes the span lock is already held.
728+
// +checklocks:s.mu
697729
func (s *Span) setTagBoolLocked(key string, v bool) {
698730
assert.RWMutexLocked(&s.mu)
699731
switch key {
@@ -722,6 +754,7 @@ func (s *Span) setTagBoolLocked(key string, v bool) {
722754

723755
// setMetric sets a numeric tag during span initialization (before the span is published).
724756
// This method should only be used during span construction in spanStart and StartSpan.
757+
// +checklocksignore
725758
func (s *Span) setMetricInit(key string, v float64) {
726759
if s.metrics == nil {
727760
s.metrics = make(map[string]float64, 1)
@@ -740,6 +773,7 @@ func (s *Span) setMetric(key string, v float64) {
740773

741774
// setMetricLocked sets a numeric tag, in our case called a metric. This method
742775
// assumes the span lock is already held.
776+
// +checklocks:s.mu
743777
func (s *Span) setMetricLocked(key string, v float64) {
744778
assert.RWMutexLocked(&s.mu)
745779
if s.metrics == nil {
@@ -778,7 +812,9 @@ func (s *Span) AddLink(link SpanLink) {
778812
}
779813

780814
// serializeSpanLinksInMeta saves span links as a JSON string under `Span[meta][_dd.span_links]`.
815+
// +checklocks:s.mu
781816
func (s *Span) serializeSpanLinksInMeta() {
817+
assert.RWMutexLocked(&s.mu)
782818
if len(s.spanLinks) == 0 {
783819
return
784820
}
@@ -795,7 +831,9 @@ func (s *Span) serializeSpanLinksInMeta() {
795831

796832
// serializeSpanEvents sets the span events from the current span in the correct transport, depending on whether the
797833
// agent supports the native method or not.
834+
// +checklocks:s.mu
798835
func (s *Span) serializeSpanEvents() {
836+
assert.RWMutexLocked(&s.mu)
799837
if len(s.spanEvents) == 0 {
800838
return
801839
}
@@ -988,6 +1026,7 @@ func obfuscatedResource(o *obfuscate.Obfuscator, typ, resource string) string {
9881026

9891027
// shouldKeep reports whether the trace should be kept.
9901028
// a single span being kept implies the whole trace being kept.
1029+
// +checklocksignore
9911030
func shouldKeep(s *Span) bool {
9921031
if p, ok := s.context.SamplingPriority(); ok && p > 0 {
9931032
// positive sampling priorities stay
@@ -1005,6 +1044,7 @@ func shouldKeep(s *Span) bool {
10051044

10061045
// shouldComputeStats mentions whether this span needs to have stats computed for.
10071046
// Warning: callers must guard!
1047+
// +checklocksignore
10081048
func shouldComputeStats(s *Span) bool {
10091049
if v, ok := s.metrics[keyMeasured]; ok && v == 1 {
10101050
return true
@@ -1047,6 +1087,7 @@ func (s *Span) String() string {
10471087
}
10481088

10491089
// Format implements fmt.Formatter.
1090+
// +checklocksignore
10501091
func (s *Span) Format(f fmt.State, c rune) {
10511092
if s == nil {
10521093
fmt.Fprintf(f, "<nil>")

0 commit comments

Comments
 (0)