Skip to content

Commit 845cf1c

Browse files
committed
fix(agent): poll Windows service state instead of fixed sleep during updates
sc stop/start are async on Windows - they return immediately without waiting for the service to actually change state. Replace fixed 10s sleep with active polling (500ms interval, 60s timeout) to guarantee the service has stopped before replacing binaries and started before health checking.
1 parent da9520b commit 845cf1c

File tree

3 files changed

+48
-17
lines changed

3 files changed

+48
-17
lines changed

agent/updater/updates/update.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,6 @@ func runUpdateProcess(basePath string) error {
106106
return fmt.Errorf("error stopping agent: %v", err)
107107
}
108108

109-
time.Sleep(10 * time.Second)
110-
111109
// Migration: check if old naming convention exists and migrate to new naming
112110
oldBinPath := filepath.Join(basePath, oldBin)
113111
if !fs.Exists(oldBinPath) {
@@ -171,7 +169,6 @@ func rollbackAgent(currentBin, backupBin, basePath string) {
171169
logger.Info("Rolling back agent to previous version...")
172170

173171
svc.Stop(config.SERV_AGENT_NAME)
174-
time.Sleep(5 * time.Second)
175172

176173
os.Remove(filepath.Join(basePath, currentBin))
177174
os.Rename(filepath.Join(basePath, backupBin), filepath.Join(basePath, currentBin))

agent/version.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
2-
"version": "11.1.5",
3-
"updater_version": "1.0.4"
2+
"version": "11.1.4",
3+
"updater_version": "1.0.5"
44
}

shared/svc/svc_windows.go

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,59 @@ import (
77
"fmt"
88
"os/exec"
99
"strings"
10+
"time"
1011
)
1112

12-
// Start starts a Windows service by name.
13+
const (
14+
pollInterval = 500 * time.Millisecond
15+
stopTimeout = 60 * time.Second
16+
startTimeout = 60 * time.Second
17+
)
18+
19+
// Start starts a Windows service by name and waits until it's running.
1320
func Start(serviceName string) error {
21+
// Already running? Nothing to do
22+
if running, _ := IsActive(serviceName); running {
23+
return nil
24+
}
25+
1426
cmd := exec.Command("sc", "start", serviceName)
15-
if err := cmd.Run(); err != nil {
16-
return fmt.Errorf("failed to start service %s: %w", serviceName, err)
27+
cmd.Run() // Ignore error, we'll check actual state
28+
29+
// Poll until running or timeout
30+
deadline := time.Now().Add(startTimeout)
31+
for time.Now().Before(deadline) {
32+
if running, _ := IsActive(serviceName); running {
33+
return nil
34+
}
35+
time.Sleep(pollInterval)
1736
}
18-
return nil
37+
38+
return fmt.Errorf("timeout waiting for service %s to start", serviceName)
1939
}
2040

21-
// Stop stops a Windows service by name.
41+
// Stop stops a Windows service by name and waits until it's stopped.
2242
func Stop(serviceName string) error {
43+
// Already stopped? Nothing to do
44+
status, _ := Status(serviceName)
45+
if status == StatusStopped {
46+
return nil
47+
}
48+
2349
cmd := exec.Command("sc", "stop", serviceName)
24-
if err := cmd.Run(); err != nil {
25-
return fmt.Errorf("failed to stop service %s: %w", serviceName, err)
50+
cmd.Run() // Ignore error, we'll check actual state
51+
52+
// Poll until stopped or timeout
53+
deadline := time.Now().Add(stopTimeout)
54+
for time.Now().Before(deadline) {
55+
status, _ := Status(serviceName)
56+
if status == StatusStopped {
57+
return nil
58+
}
59+
time.Sleep(pollInterval)
2660
}
27-
return nil
61+
62+
return fmt.Errorf("timeout waiting for service %s to stop", serviceName)
2863
}
2964

3065
// Restart restarts a Windows service by stopping and starting it.
@@ -37,12 +72,11 @@ func Restart(serviceName string) error {
3772

3873
// IsActive checks if a Windows service is running.
3974
func IsActive(serviceName string) (bool, error) {
40-
cmd := exec.Command("sc", "query", serviceName)
41-
output, err := cmd.Output()
75+
status, err := Status(serviceName)
4276
if err != nil {
43-
return false, fmt.Errorf("failed to query service %s: %w", serviceName, err)
77+
return false, err
4478
}
45-
return strings.Contains(string(output), "RUNNING"), nil
79+
return status == StatusRunning, nil
4680
}
4781

4882
// Status returns the status of a Windows service.

0 commit comments

Comments
 (0)