Skip to content

Commit 3df894a

Browse files
committed
refactor(tracer): add checklocks annotations to Span struct
Add compile-time lock verification annotations to the Span struct, the most critical data structure in the tracing library. Signed-off-by: Kemal Akkoyun <kemal.akkoyun@datadoghq.com>
1 parent 8dbb9ba commit 3df894a

File tree

13 files changed

+183
-22
lines changed

13 files changed

+183
-22
lines changed

ddtrace/tracer/abandonedspans.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ type abandonedSpanCandidate struct {
8282
Integration string
8383
}
8484

85+
// +checklocksignore
8586
func newAbandonedSpanCandidate(s *Span, finished bool) *abandonedSpanCandidate {
8687
var component string
8788
if v, ok := s.meta[ext.Component]; ok {

ddtrace/tracer/civisibility_tslv.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ type ciVisibilityEvent struct {
159159
//
160160
// key - The tag key.
161161
// value - The tag value.
162+
// +checklocksignore
162163
func (e *ciVisibilityEvent) SetTag(key string, value interface{}) {
163164
e.span.SetTag(key, value)
164165
e.Content.Meta = e.span.meta
@@ -170,6 +171,7 @@ func (e *ciVisibilityEvent) SetTag(key string, value interface{}) {
170171
// Parameters:
171172
//
172173
// operationName - The new operation name.
174+
// +checklocksignore
173175
func (e *ciVisibilityEvent) SetOperationName(operationName string) {
174176
e.span.SetOperationName(operationName)
175177
e.Content.Name = e.span.name
@@ -245,6 +247,8 @@ type tslvSpan struct {
245247
// Returns:
246248
//
247249
// A pointer to the created ciVisibilityEvent.
250+
//
251+
// +checklocksignore
248252
func getCiVisibilityEvent(span *Span) *ciVisibilityEvent {
249253
switch span.spanType {
250254
case constants.SpanTypeTest:
@@ -269,6 +273,8 @@ func getCiVisibilityEvent(span *Span) *ciVisibilityEvent {
269273
// Returns:
270274
//
271275
// A pointer to the created ciVisibilityEvent.
276+
//
277+
// +checklocksignore
272278
func createTestEventFromSpan(span *Span) *ciVisibilityEvent {
273279
tSpan := createTslvSpan(span)
274280
tSpan.ParentID = 0
@@ -295,6 +301,8 @@ func createTestEventFromSpan(span *Span) *ciVisibilityEvent {
295301
// Returns:
296302
//
297303
// A pointer to the created ciVisibilityEvent.
304+
//
305+
// +checklocksignore
298306
func createTestSuiteEventFromSpan(span *Span) *ciVisibilityEvent {
299307
tSpan := createTslvSpan(span)
300308
tSpan.ParentID = 0
@@ -318,6 +326,8 @@ func createTestSuiteEventFromSpan(span *Span) *ciVisibilityEvent {
318326
// Returns:
319327
//
320328
// A pointer to the created ciVisibilityEvent.
329+
//
330+
// +checklocksignore
321331
func createTestModuleEventFromSpan(span *Span) *ciVisibilityEvent {
322332
tSpan := createTslvSpan(span)
323333
tSpan.ParentID = 0
@@ -340,6 +350,8 @@ func createTestModuleEventFromSpan(span *Span) *ciVisibilityEvent {
340350
// Returns:
341351
//
342352
// A pointer to the created ciVisibilityEvent.
353+
//
354+
// +checklocksignore
343355
func createTestSessionEventFromSpan(span *Span) *ciVisibilityEvent {
344356
tSpan := createTslvSpan(span)
345357
tSpan.ParentID = 0
@@ -361,6 +373,8 @@ func createTestSessionEventFromSpan(span *Span) *ciVisibilityEvent {
361373
// Returns:
362374
//
363375
// A pointer to the created ciVisibilityEvent.
376+
//
377+
// +checklocksignore
364378
func createSpanEventFromSpan(span *Span) *ciVisibilityEvent {
365379
tSpan := createTslvSpan(span)
366380
tSpan.SpanID = span.spanID
@@ -382,6 +396,8 @@ func createSpanEventFromSpan(span *Span) *ciVisibilityEvent {
382396
// Returns:
383397
//
384398
// The created tslvSpan.
399+
//
400+
// +checklocksignore
385401
func createTslvSpan(span *Span) tslvSpan {
386402
return tslvSpan{
387403
Name: span.name,

ddtrace/tracer/context.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ func SpanFromContext(ctx context.Context) (*Span, bool) {
7676
// StartSpanFromContext returns a new span with the given operation name and options. If a span
7777
// is found in the context, it will be used as the parent of the resulting span. If the ChildOf
7878
// option is passed, it will only be used as the parent if there is no span found in `ctx`.
79+
// +checklocksignore
7980
func StartSpanFromContext(ctx context.Context, operationName string, opts ...StartSpanOption) (*Span, context.Context) {
8081
// copy opts in case the caller reuses the slice in parallel
8182
// we will add at least 1, at most 2 items

ddtrace/tracer/payload_v1.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ func newPayloadV1() *payloadV1 {
102102
}
103103

104104
// push pushes a new item (a traceChunk)into the payload.
105+
// +checklocksignore
105106
func (p *payloadV1) push(t spanList) (stats payloadStats, err error) {
106107
// We need to hydrate the payload with everything we get from the spans.
107108
// Conceptually, our `t spanList` corresponds to one `traceChunk`.
@@ -420,6 +421,7 @@ func (p *payloadV1) encodeTraceChunks(bm bitmap, fieldID int, tc []traceChunk, s
420421
}
421422

422423
// encodeSpans encodes a list of spans associated with fieldID into p.buf in msgp format.
424+
// +checklocksignore
423425
func (p *payloadV1) encodeSpans(bm bitmap, fieldID int, spans spanList, st *stringTable) (bool, error) {
424426
if len(spans) == 0 || !bm.contains(uint32(fieldID)) {
425427
return false, nil
@@ -1080,6 +1082,7 @@ func decodeSpans(b []byte, st *stringTable) (spanList, []byte, error) {
10801082

10811083
// decode reads a span from a byte slice and populates the associated fields in the span.
10821084
// This should only be used with decoding v1.0 payloads.
1085+
// +checklocksignore
10831086
func (span *Span) decode(b []byte, st *stringTable) ([]byte, error) {
10841087
numFields, o, err := msgp.ReadMapHeaderBytes(b)
10851088
for range numFields {

ddtrace/tracer/rules_sampler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ func (sr *SamplingRule) EqualsFalseNegative(other *SamplingRule) bool {
187187
}
188188

189189
// match returns true when the span's details match all the expected values in the rule.
190+
// +checklocksignore
190191
func (sr *SamplingRule) match(s *Span) bool {
191192
if sr.Service != nil && !sr.Service.MatchString(s.service) {
192193
return false
@@ -476,6 +477,7 @@ func (rs *traceRulesSampler) sampleRules(span *Span) bool {
476477
return true
477478
}
478479

480+
// +checklocksignore
479481
func (rs *traceRulesSampler) applyRate(span *Span, rate float64, now time.Time, sampler samplernames.SamplerName) {
480482
// Use the helper method to apply the rate and execute sampling logic with the lock held
481483
span.applyRateWithLock(rate, func() {
@@ -547,6 +549,7 @@ func (rs *singleSpanRulesSampler) enabled() bool {
547549
// apply uses the sampling rules to determine the sampling rate for the
548550
// provided span. If the rules don't match, then it returns false and the span is not
549551
// modified.
552+
// +checklocksignore
550553
func (rs *singleSpanRulesSampler) apply(span *Span) bool {
551554
for _, rule := range rs.rules {
552555
if rule.match(span) {

ddtrace/tracer/sampler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ func (r *rateSampler) SetRate(rate float64) {
9191
const knuthFactor = uint64(1111111111111111111)
9292

9393
// Sample returns true if the given span should be sampled.
94+
// +checklocksignore
9495
func (r *rateSampler) Sample(s *Span) bool {
9596
if r.rate == 1 {
9697
// fast path
@@ -159,6 +160,7 @@ func (ps *prioritySampler) readRatesJSON(rc io.ReadCloser) error {
159160

160161
// getRate returns the sampling rate to be used for the given span. Callers must
161162
// guard the span.
163+
// +checklocksignore
162164
func (ps *prioritySampler) getRate(spn *Span) float64 {
163165
key := "service:" + spn.service + ",env:" + spn.meta[ext.Environment]
164166
ps.mu.RLock()
@@ -171,6 +173,7 @@ func (ps *prioritySampler) getRate(spn *Span) float64 {
171173

172174
// apply applies sampling priority to the given span. Caller must ensure it is safe
173175
// to modify the span.
176+
// +checklocksignore
174177
func (ps *prioritySampler) apply(spn *Span) {
175178
rate := ps.getRate(spn)
176179
if sampledByRate(spn.traceID, rate) {

ddtrace/tracer/span.go

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
//msgp:ignore inheritedData
77
//go:generate go run github.com/tinylib/msgp -unexported -marshal=false -o=span_msgp.go -tests=false
8+
//go:generate go run ../../scripts/msgp_checklocks_ignore.go -type Span -file span_msgp.go
89

910
package tracer
1011

@@ -70,6 +71,7 @@ type errorConfig struct {
7071
// properties set at any time during normal operation! This is used for testing only,
7172
// and should not be used in non-test code, or you may run into performance or other
7273
// issues.
74+
// +checklocksignore
7375
func (s *Span) AsMap() map[string]interface{} {
7476
m := make(map[string]interface{})
7577
if s == nil {
@@ -100,6 +102,7 @@ func (s *Span) AsMap() map[string]interface{} {
100102
return m
101103
}
102104

105+
// +checklocksignore
103106
func (s *Span) spanEventsAsJSONString() string {
104107
if !s.supportsEvents {
105108
return s.meta["events"]
@@ -118,34 +121,55 @@ func (s *Span) spanEventsAsJSONString() string {
118121
// Span represents a computation. Callers must call Finish when a Span is
119122
// complete to ensure it's submitted.
120123
type Span struct {
121-
mu locking.RWMutex `msg:"-"` // all fields are protected by this RWMutex
122-
123-
name string `msg:"name"` // operation name
124-
service string `msg:"service"` // service name (i.e. "grpc.server", "http.request")
125-
resource string `msg:"resource"` // resource name (i.e. "/user?id=123", "SELECT * FROM users")
126-
spanType string `msg:"type"` // protocol associated with the span (i.e. "web", "db", "cache")
127-
start int64 `msg:"start"` // span start time expressed in nanoseconds since epoch
128-
duration int64 `msg:"duration"` // duration of the span expressed in nanoseconds
129-
meta map[string]string `msg:"meta,omitempty"` // arbitrary map of metadata
130-
metaStruct metaStructMap `msg:"meta_struct,omitempty"` // arbitrary map of metadata with structured values
131-
metrics map[string]float64 `msg:"metrics,omitempty"` // arbitrary map of numeric metrics
132-
spanID uint64 `msg:"span_id"` // identifier of this span
133-
traceID uint64 `msg:"trace_id"` // lower 64-bits of the root span identifier
134-
parentID uint64 `msg:"parent_id"` // identifier of the span's direct parent
135-
error int32 `msg:"error"` // error status of the span; 0 means no errors
136-
spanLinks []SpanLink `msg:"span_links,omitempty"` // links to other spans
137-
spanEvents []spanEvent `msg:"span_events,omitempty"` // events produced related to this span
124+
// guards below fields
125+
mu locking.RWMutex `msg:"-"`
126+
127+
// +checklocks:mu
128+
name string `msg:"name"` // operation name
129+
// +checklocks:mu
130+
service string `msg:"service"` // service name (i.e. "grpc.server", "http.request")
131+
// +checklocks:mu
132+
resource string `msg:"resource"` // resource name (i.e. "/user?id=123", "SELECT * FROM users")
133+
// +checklocks:mu
134+
spanType string `msg:"type"` // protocol associated with the span (i.e. "web", "db", "cache")
135+
// +checklocks:mu
136+
start int64 `msg:"start"` // span start time expressed in nanoseconds since epoch
137+
// +checklocks:mu
138+
duration int64 `msg:"duration"` // duration of the span expressed in nanoseconds
139+
// +checklocks:mu
140+
meta map[string]string `msg:"meta,omitempty"` // arbitrary map of metadata
141+
// +checklocks:mu
142+
metaStruct metaStructMap `msg:"meta_struct,omitempty"` // arbitrary map of metadata with structured values
143+
// +checklocks:mu
144+
metrics map[string]float64 `msg:"metrics,omitempty"` // arbitrary map of numeric metrics
145+
// +checklocks:mu
146+
spanID uint64 `msg:"span_id"` // identifier of this span
147+
// +checklocks:mu
148+
traceID uint64 `msg:"trace_id"` // lower 64-bits of the root span identifier
149+
// +checklocks:mu
150+
parentID uint64 `msg:"parent_id"` // identifier of the span's direct parent
151+
// +checklocks:mu
152+
error int32 `msg:"error"` // error status of the span; 0 means no errors
153+
// +checklocks:mu
154+
spanLinks []SpanLink `msg:"span_links,omitempty"` // links to other spans
155+
// +checklocks:mu
156+
spanEvents []spanEvent `msg:"span_events,omitempty"` // events produced related to this span
138157

139158
goExecTraced bool `msg:"-"`
140159
noDebugStack bool `msg:"-"` // disables debug stack traces
141-
finished bool `msg:"-"` // true if the span has been submitted to a tracer. Can only be read/modified if the trace is locked.
142160
context *SpanContext `msg:"-"` // span propagation context
143-
integration string `msg:"-"` // where the span was started from, such as a specific contrib or "manual"
144161
supportsEvents bool `msg:"-"` // whether the span supports native span events or not
145162

146-
pprofCtxActive context.Context `msg:"-"` // contains pprof.WithLabel labels to tell the profiler more about this span
163+
// +checklocks:mu
164+
finished bool `msg:"-"` // true if the span has been submitted to a tracer. Can only be read/modified if the trace is locked.
165+
// +checklocks:mu
166+
integration string `msg:"-"` // where the span was started from, such as a specific contrib or "manual"
167+
// +checklocks:mu
168+
pprofCtxActive context.Context `msg:"-"` // contains pprof.WithLabel labels to tell the profiler more about this span
169+
147170
pprofCtxRestore context.Context `msg:"-"` // contains pprof.WithLabel labels of the parent span (if any) that need to be restored when this span finishes
148171

172+
// +checklocks:mu
149173
taskEnd func() // ends execution tracer (runtime/trace) task, if started
150174
}
151175

@@ -539,6 +563,8 @@ func (s *Span) StartChild(operationName string, opts ...StartSpanOption) *Span {
539563

540564
// setSamplingPriorityLocked updates the sampling priority.
541565
// It also updates the trace's sampling priority.
566+
// s.mu must be held for writing.
567+
// +checklocks:s.mu
542568
func (s *Span) setSamplingPriorityLocked(priority int, sampler samplernames.SamplerName) {
543569
assert.RWMutexLocked(&s.mu)
544570
// We don't lock spans when flushing, so we could have a data race when
@@ -555,6 +581,8 @@ func (s *Span) setSamplingPriorityLocked(priority int, sampler samplernames.Samp
555581
// If the trace is locked, the sampling priority is forced to the given value.
556582
//
557583
// This function is should only be used when applying a manual keep or drop decision.
584+
// s.mu must be held for writing.
585+
// +checklocks:s.mu
558586
func (s *Span) forceSetSamplingPriorityLocked(priority int, sampler samplernames.SamplerName) {
559587
assert.RWMutexLocked(&s.mu)
560588
// We don't lock spans when flushing, so we could have a data race when
@@ -568,7 +596,8 @@ func (s *Span) forceSetSamplingPriorityLocked(priority int, sampler samplernames
568596
}
569597

570598
// setTagErrorLocked sets the error tag. It accounts for various valid scenarios.
571-
// This method assumes the span lock is already held.
599+
// s.mu must be held for writing.
600+
// +checklocks:s.mu
572601
func (s *Span) setTagErrorLocked(value interface{}, cfg errorConfig) {
573602
assert.RWMutexLocked(&s.mu)
574603
setError := func(yes bool) {
@@ -658,12 +687,14 @@ func (s *Span) setMeta(key, v string) {
658687
}
659688

660689
// setMetaLocked sets a string tag. This method assumes the span lock is already held.
690+
// +checklocks:s.mu
661691
func (s *Span) setMetaLocked(key, v string) {
662692
assert.RWMutexLocked(&s.mu)
663693
s.setMetaInit(key, v)
664694
}
665695

666696
// setMetaInit sets a string tag without acquiring the lock and asserting the lock is held.
697+
// +checklocksignore
667698
func (s *Span) setMetaInit(key, v string) {
668699
if s.meta == nil {
669700
s.meta = make(map[string]string, 1)
@@ -684,6 +715,7 @@ func (s *Span) setMetaInit(key, v string) {
684715
}
685716

686717
// setMetaStructLocked sets structured metadata. This method assumes the span lock is already held.
718+
// +checklocks:s.mu
687719
func (s *Span) setMetaStructLocked(key string, v any) {
688720
assert.RWMutexLocked(&s.mu)
689721
if s.metaStruct == nil {
@@ -693,6 +725,7 @@ func (s *Span) setMetaStructLocked(key string, v any) {
693725
}
694726

695727
// setTagBoolLocked sets a boolean tag on the span. This method assumes the span lock is already held.
728+
// +checklocks:s.mu
696729
func (s *Span) setTagBoolLocked(key string, v bool) {
697730
assert.RWMutexLocked(&s.mu)
698731
switch key {
@@ -721,6 +754,7 @@ func (s *Span) setTagBoolLocked(key string, v bool) {
721754

722755
// setMetric sets a numeric tag during span initialization (before the span is published).
723756
// This method should only be used during span construction in spanStart and StartSpan.
757+
// +checklocksignore
724758
func (s *Span) setMetricInit(key string, v float64) {
725759
if s.metrics == nil {
726760
s.metrics = make(map[string]float64, 1)
@@ -739,6 +773,7 @@ func (s *Span) setMetric(key string, v float64) {
739773

740774
// setMetricLocked sets a numeric tag, in our case called a metric. This method
741775
// assumes the span lock is already held.
776+
// +checklocks:s.mu
742777
func (s *Span) setMetricLocked(key string, v float64) {
743778
assert.RWMutexLocked(&s.mu)
744779
if s.metrics == nil {
@@ -777,7 +812,9 @@ func (s *Span) AddLink(link SpanLink) {
777812
}
778813

779814
// serializeSpanLinksInMeta saves span links as a JSON string under `Span[meta][_dd.span_links]`.
815+
// +checklocks:s.mu
780816
func (s *Span) serializeSpanLinksInMeta() {
817+
assert.RWMutexLocked(&s.mu)
781818
if len(s.spanLinks) == 0 {
782819
return
783820
}
@@ -794,7 +831,9 @@ func (s *Span) serializeSpanLinksInMeta() {
794831

795832
// serializeSpanEvents sets the span events from the current span in the correct transport, depending on whether the
796833
// agent supports the native method or not.
834+
// +checklocks:s.mu
797835
func (s *Span) serializeSpanEvents() {
836+
assert.RWMutexLocked(&s.mu)
798837
if len(s.spanEvents) == 0 {
799838
return
800839
}
@@ -987,6 +1026,7 @@ func obfuscatedResource(o *obfuscate.Obfuscator, typ, resource string) string {
9871026

9881027
// shouldKeep reports whether the trace should be kept.
9891028
// a single span being kept implies the whole trace being kept.
1029+
// +checklocksignore
9901030
func shouldKeep(s *Span) bool {
9911031
if p, ok := s.context.SamplingPriority(); ok && p > 0 {
9921032
// positive sampling priorities stay
@@ -1004,6 +1044,7 @@ func shouldKeep(s *Span) bool {
10041044

10051045
// shouldComputeStats mentions whether this span needs to have stats computed for.
10061046
// Warning: callers must guard!
1047+
// +checklocksignore
10071048
func shouldComputeStats(s *Span) bool {
10081049
if v, ok := s.metrics[keyMeasured]; ok && v == 1 {
10091050
return true
@@ -1046,6 +1087,7 @@ func (s *Span) String() string {
10461087
}
10471088

10481089
// Format implements fmt.Formatter.
1090+
// +checklocksignore
10491091
func (s *Span) Format(f fmt.State, c rune) {
10501092
if s == nil {
10511093
fmt.Fprintf(f, "<nil>")

0 commit comments

Comments
 (0)