@@ -2,18 +2,20 @@ package cronjob
22
33import (
44 "bytes"
5+ "strings"
6+ "sync"
7+ "time"
8+
59 "github.com/swiftwave-org/swiftwave/ssh_toolkit"
610 "github.com/swiftwave-org/swiftwave/swiftwave_service/core"
711 "github.com/swiftwave-org/swiftwave/swiftwave_service/logger"
8- "strings"
9- "time"
1012)
1113
1214func (m Manager ) MonitorServerStatus () {
1315 logger .CronJobLogger .Println ("Starting server status monitor [cronjob]" )
1416 for {
1517 m .monitorServerStatus ()
16- time .Sleep (1 * time .Minute )
18+ time .Sleep (2 * time .Second )
1719 }
1820}
1921
@@ -29,43 +31,64 @@ func (m Manager) monitorServerStatus() {
2931 if len (servers ) == 0 {
3032 logger .CronJobLogger .Println ("Skipping ! No server found" )
3133 return
34+ }
35+
36+ var wg sync.WaitGroup
37+ for _ , server := range servers {
38+ if server .Status == core .ServerNeedsSetup || server .Status == core .ServerPreparing {
39+ continue
40+ }
41+ wg .Add (1 )
42+ go func (server core.Server ) {
43+ defer wg .Done ()
44+ m .checkAndUpdateServerStatus (server )
45+ }(server )
46+ }
47+ wg .Wait ()
48+ }
49+
50+ func (m Manager ) checkAndUpdateServerStatus (server core.Server ) {
51+ if m .isServerOnline (server ) {
52+ if server .Status != core .ServerOnline {
53+ err := core .MarkServerAsOnline (& m .ServiceManager .DbClient , & server )
54+ if err != nil {
55+ logger .CronJobLoggerError .Println ("DB Error : Failed to mark server as online >" , server .HostName , err )
56+ } else {
57+ logger .CronJobLogger .Println ("Server marked as online >" , server .HostName )
58+ }
59+ }
3260 } else {
33- for _ , server := range servers {
34- if server .Status == core .ServerNeedsSetup || server .Status == core .ServerPreparing {
35- continue
61+ if server .Status != core .ServerOffline {
62+ err := core .MarkServerAsOffline (& m .ServiceManager .DbClient , & server )
63+ if err != nil {
64+ logger .CronJobLoggerError .Println ("DB Error : Failed to mark server as offline >" , server .HostName , err )
65+ } else {
66+ logger .CronJobLogger .Println ("Server marked as offline >" , server .HostName )
3667 }
37- go func (server core.Server ) {
38- if server .Status == core .ServerOffline {
39- ssh_toolkit .DeleteSSHClient (server .HostName )
40- }
41- if m .isServerOnline (server ) {
42- err = core .MarkServerAsOnline (& m .ServiceManager .DbClient , & server )
43- if err != nil {
44- logger .CronJobLoggerError .Println ("DB Error : Failed to mark server as online > " , server .HostName )
45- } else {
46- logger .CronJobLogger .Println ("Server marked as online > " , server .HostName )
47- }
48- } else {
49- err = core .MarkServerAsOffline (& m .ServiceManager .DbClient , & server )
50- if err != nil {
51- logger .CronJobLoggerError .Println ("DB Error : Failed to mark server as offline > " , server .HostName )
52- } else {
53- logger .CronJobLogger .Println ("Server marked as offline > " , server .HostName )
54- }
55- }
56- }(server )
68+ } else {
69+ logger .CronJobLogger .Println ("Server already offline >" , server .HostName )
5770 }
5871 }
5972}
6073
6174func (m Manager ) isServerOnline (server core.Server ) bool {
75+ retries := 3 // try for 3 times before giving up
76+ if server .Status == core .ServerOffline {
77+ /**
78+ * If server is offline, try only once
79+ * Else, it will take total 30 seconds (3 retries * 10 seconds of default SSH timeout)
80+ */
81+ retries = 1
82+ }
6283 // try for 3 times
63- for i := 0 ; i < 3 ; i ++ {
84+ for i := 0 ; i < retries ; i ++ {
6485 cmd := "echo ok"
6586 stdoutBuf := new (bytes.Buffer )
6687 stderrBuf := new (bytes.Buffer )
67- err := ssh_toolkit .ExecCommandOverSSH (cmd , stdoutBuf , stderrBuf , 3 , server .IP , server .SSHPort , server .User , m .Config .SystemConfig .SshPrivateKey )
88+ err := ssh_toolkit .ExecCommandOverSSHWithOptions (cmd , stdoutBuf , stderrBuf , 3 , server .IP , server .SSHPort , server .User , m .Config .SystemConfig .SshPrivateKey , false )
6889 if err != nil {
90+ logger .CronJobLoggerError .Println ("Error while checking if server is online" , server .HostName , err .Error ())
91+ time .Sleep (1 * time .Second )
6992 continue
7093 }
7194 if strings .Compare (strings .TrimSpace (stdoutBuf .String ()), "ok" ) == 0 {
0 commit comments