-
Notifications
You must be signed in to change notification settings - Fork 7k
fix(reposerver): context-aware revision lock to prevent convoy deadlock #26867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
979fd5c
38ebbc9
5d212ff
318ef90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| package repository | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "io" | ||
| "sync" | ||
|
|
@@ -17,20 +18,25 @@ type repositoryLock struct { | |
| stateByKey map[string]*repositoryState | ||
| } | ||
|
|
||
| // Lock acquires lock unless lock is already acquired with the same commit and allowConcurrent is set to true | ||
| // Lock acquires lock unless lock is already acquired with the same commit and allowConcurrent is set to true. | ||
| // The context allows callers to cancel waiting for the lock, preventing convoy deadlocks when | ||
| // goroutines for newer revisions pile up behind the current revision. | ||
| // The init callback receives `clean` parameter which indicates if repo state must be cleaned after running non-concurrent operation. | ||
| // The first init always runs with `clean` set to true because we cannot be sure about initial repo state. | ||
| func (r *repositoryLock) Lock(path string, revision string, allowConcurrent bool, init func(clean bool) (io.Closer, error)) (io.Closer, error) { | ||
| func (r *repositoryLock) Lock(ctx context.Context, path string, revision string, allowConcurrent bool, init func(clean bool) (io.Closer, error)) (io.Closer, error) { | ||
| if ctx.Err() != nil { | ||
| return nil, ctx.Err() | ||
| } | ||
| r.lock.Lock() | ||
| state, ok := r.stateByKey[path] | ||
| if !ok { | ||
| state = &repositoryState{cond: &sync.Cond{L: &sync.Mutex{}}} | ||
| state = &repositoryState{broadcast: make(chan struct{})} | ||
| r.stateByKey[path] = state | ||
| } | ||
| r.lock.Unlock() | ||
|
|
||
| closer := utilio.NewCloser(func() error { | ||
| state.cond.L.Lock() | ||
| state.mu.Lock() | ||
| notify := false | ||
| state.processCount-- | ||
| var err error | ||
|
|
@@ -40,45 +46,54 @@ func (r *repositoryLock) Lock(path string, revision string, allowConcurrent bool | |
| err = state.initCloser.Close() | ||
| } | ||
|
|
||
| state.cond.L.Unlock() | ||
| if notify { | ||
| state.cond.Broadcast() | ||
| close(state.broadcast) | ||
| state.broadcast = make(chan struct{}) | ||
| } | ||
| state.mu.Unlock() | ||
|
Comment on lines
49
to
+53
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about changing the code to the following to avoid the following race condition? After unlocking
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
in the PR code all reads/updates of the state.broadcast field |
||
| if err != nil { | ||
| return fmt.Errorf("init closer failed: %w", err) | ||
| } | ||
| return nil | ||
| }) | ||
|
|
||
| for { | ||
| state.cond.L.Lock() | ||
| state.mu.Lock() | ||
| if state.revision == "" { | ||
| // no in progress operation for that repo. Go ahead. | ||
| initCloser, err := init(!state.allowConcurrent) | ||
| if err != nil { | ||
| state.cond.L.Unlock() | ||
| state.mu.Unlock() | ||
| return nil, fmt.Errorf("failed to initialize repository resources: %w", err) | ||
| } | ||
| state.initCloser = initCloser | ||
| state.revision = revision | ||
| state.processCount = 1 | ||
| state.allowConcurrent = allowConcurrent | ||
| state.cond.L.Unlock() | ||
| state.mu.Unlock() | ||
| return closer, nil | ||
| } else if state.revision == revision && state.allowConcurrent && allowConcurrent { | ||
| // same revision already processing and concurrent processing allowed. Increment process count and go ahead. | ||
| state.processCount++ | ||
| state.cond.L.Unlock() | ||
| state.mu.Unlock() | ||
| return closer, nil | ||
| } | ||
| state.cond.Wait() | ||
| ch := state.broadcast | ||
| state.mu.Unlock() | ||
|
|
||
| // wait when all in-flight processes of this revision complete and try again | ||
| state.cond.L.Unlock() | ||
| select { | ||
| case <-ch: | ||
| // broadcast received, retry | ||
| case <-ctx.Done(): | ||
| return nil, ctx.Err() | ||
| } | ||
| } | ||
| } | ||
|
|
||
| type repositoryState struct { | ||
| cond *sync.Cond | ||
| mu sync.Mutex | ||
| broadcast chan struct{} | ||
| revision string | ||
| initCloser io.Closer | ||
| processCount int | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.