|
1 | 1 | package rule |
2 | 2 |
|
3 | 3 | import ( |
4 | | - "context" |
5 | 4 | "errors" |
6 | 5 | "fmt" |
7 | | - "math" |
8 | | - "math/rand/v2" |
9 | | - "strings" |
10 | | - "time" |
11 | 6 |
|
12 | 7 | "github.com/lf-edge/ekuiper/v2/internal/pkg/def" |
13 | 8 | "github.com/lf-edge/ekuiper/v2/internal/topo" |
@@ -100,18 +95,13 @@ func (s *State) doStart() error { |
100 | 95 | s.topoGraph = s.topology.GetTopo() |
101 | 96 | } |
102 | 97 | } |
103 | | - ctx, cancel := context.WithCancel(context.Background()) |
104 | | - s.cancelRetry = cancel |
105 | | - go s.runTopo(ctx, s.topology, s.Rule.Options.RestartStrategy) |
| 98 | + go s.runTopo(s.topology) |
106 | 99 | return nil |
107 | 100 | }) |
108 | 101 | return err |
109 | 102 | } |
110 | 103 |
|
111 | 104 | func (s *State) doStop() error { |
112 | | - if s.cancelRetry != nil { |
113 | | - s.cancelRetry() |
114 | | - } |
115 | 105 | if s.topology != nil { |
116 | 106 | e := s.topology.GetContext().Err() |
117 | 107 | s.topoGraph = s.topology.GetTopo() |
@@ -144,91 +134,45 @@ func (s *State) stopOld() { |
144 | 134 | return |
145 | 135 | } |
146 | 136 |
|
147 | | -// This is called async |
148 | | -func (s *State) runTopo(ctx context.Context, tp *topo.Topo, rs *def.RestartStrategy) { |
149 | | - err := infra.SafeRun(func() error { |
150 | | - count := 0 |
151 | | - d := time.Duration(rs.Delay) |
152 | | - var er error |
153 | | - ticker := time.NewTicker(d) |
154 | | - defer ticker.Stop() |
155 | | - for { |
156 | | - select { |
157 | | - case e := <-tp.Open(): |
158 | | - er = e |
159 | | - if errorx.IsUnexpectedErr(er) { // Only restart Rule for errors |
160 | | - tp.GetContext().SetError(er) |
161 | | - s.logger.Errorf("closing Rule for error: %v", er) |
162 | | - tp.Cancel() |
163 | | - s.transitState(machine.Stopped, "retrying after error: "+er.Error()) |
164 | | - } else { |
165 | | - // exit normally |
166 | | - lastWill := "cancelled manually" |
167 | | - if errorx.IsEOF(er) { |
168 | | - lastWill = EOFMessage |
169 | | - msg := er.Error() |
170 | | - if len(msg) > 0 { |
171 | | - lastWill = fmt.Sprintf("%s: %s", lastWill, msg) |
172 | | - } |
173 | | - s.updateTrigger(s.Rule.Id, false) |
174 | | - } |
175 | | - tp.Cancel() |
176 | | - s.transitState(machine.Stopped, lastWill) |
177 | | - return nil |
178 | | - } |
179 | | - } |
180 | | - if count < rs.Attempts { |
181 | | - if d > time.Duration(rs.MaxDelay) { |
182 | | - d = time.Duration(rs.MaxDelay) |
183 | | - } |
184 | | - if rs.JitterFactor > 0 { |
185 | | - d = time.Duration(math.Round(float64(d.Milliseconds())*((rand.Float64()*2-1)*rs.JitterFactor+1))) * time.Millisecond |
186 | | - // make sure d is always in range |
187 | | - for d <= 0 || d > time.Duration(rs.MaxDelay) { |
188 | | - d = time.Duration(math.Round(float64(d.Milliseconds())*((rand.Float64()*2-1)*rs.JitterFactor+1))) * time.Millisecond |
189 | | - } |
190 | | - s.logger.Infof("Rule will restart with jitterred delay %d", d) |
191 | | - } else { |
192 | | - s.logger.Infof("Rule will restart with delay %d", d) |
193 | | - } |
194 | | - // retry after delay |
195 | | - select { |
196 | | - case <-ticker.C: |
197 | | - break |
198 | | - case <-ctx.Done(): |
199 | | - s.logger.Errorf("stop Rule retry as cancelled") |
200 | | - return nil |
201 | | - } |
202 | | - count++ |
203 | | - if rs.Multiplier > 0 { |
204 | | - d = time.Duration(rs.Delay) * time.Duration(math.Pow(rs.Multiplier, float64(count))) |
205 | | - } |
206 | | - } else { |
207 | | - return er |
| 137 | +// This is called async, so do not touch state properties |
| 138 | +func (s *State) runTopo(tp *topo.Topo) { |
| 139 | + e := <-tp.Open() |
| 140 | + tp.Cancel() |
| 141 | + var lastWill string |
| 142 | + hasError := false |
| 143 | + if errorx.IsUnexpectedErr(e) { // Only restart Rule for errors |
| 144 | + tp.GetContext().SetError(e) |
| 145 | + lastWill = e.Error() |
| 146 | + hasError = true |
| 147 | + } else { |
| 148 | + // exit normally |
| 149 | + lastWill = "canceled manually" |
| 150 | + if errorx.IsEOF(e) { |
| 151 | + lastWill = EOFMessage |
| 152 | + msg := e.Error() |
| 153 | + if len(msg) > 0 { |
| 154 | + lastWill = fmt.Sprintf("%s: %s", lastWill, msg) |
208 | 155 | } |
| 156 | + s.updateTrigger(s.Rule.Id, false) |
209 | 157 | } |
210 | | - }) |
211 | | - s.cleanRule(err) |
| 158 | + } |
| 159 | + s.cleanRule(hasError, lastWill) |
212 | 160 | } |
213 | 161 |
|
214 | | -func (s *State) cleanRule(err error) { |
| 162 | +func (s *State) cleanRule(hasError bool, lastWill string) { |
215 | 163 | s.ruleLock.Lock() |
216 | 164 | defer s.ruleLock.Unlock() |
217 | 165 | if s.topology != nil { |
218 | 166 | s.topoGraph = s.topology.GetTopo() |
219 | 167 | keys, values := s.topology.GetMetrics() |
220 | 168 | s.stoppedMetrics = []any{keys, values} |
221 | 169 | } |
222 | | - if err != nil { // Exit after retries |
223 | | - s.logger.Error(err) |
224 | | - s.transitState(machine.StoppedByErr, err.Error()) |
225 | | - s.topology = nil |
| 170 | + s.topology = nil |
| 171 | + if hasError { |
| 172 | + s.transitState(machine.StoppedByErr, lastWill) |
226 | 173 | s.logger.Infof("%s exit by error set tp to nil", s.Rule.Id) |
227 | | - } else if strings.HasPrefix(s.sm.LastWill(), EOFMessage) { |
228 | | - // Two case when err is nil; 1. Manually stop 2.EOF |
229 | | - // Only transit status when EOF. Don't do this for manual stop because the state already changed! |
230 | | - s.transitState(machine.Stopped, "") |
231 | | - s.topology = nil |
| 174 | + } else { |
| 175 | + s.transitState(machine.Stopped, lastWill) |
232 | 176 | s.logger.Infof("%s exit eof set tp to nil", s.Rule.Id) |
233 | 177 | } |
234 | 178 | } |
0 commit comments