Skip to content

Commit cb2f330

Browse files
fix: Stabilize websocket connection (#166)
1 parent 9af1cdd commit cb2f330

File tree

6 files changed

+53
-7
lines changed

6 files changed

+53
-7
lines changed

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,12 @@
9090
"tslib": "^2.1.0",
9191
"typescript": "^5.7.2",
9292
"why-is-node-running": "^2.3.0",
93-
"isomorphic-ws": "^5.0.0"
93+
"isomorphic-ws": "^5.0.0",
94+
"@msgpack/msgpack": "^2.7.1"
9495
},
9596
"dependencies": {
9697
"@hey-api/client-fetch": "^0.7.3",
9798
"@inkjs/ui": "^2.0.0",
98-
"@msgpack/msgpack": "^3.1.0",
9999
"@opentelemetry/api": "^1.9.0",
100100
"@xterm/addon-serialize": "^0.13.0",
101101
"@xterm/headless": "^5.5.0",

src/AgentClient/AgentConnection.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,15 @@ export class AgentConnection {
9393
});
9494

9595
connection.onMissingHeartbeat(() => {
96+
// Be more conservative about disconnection - only disconnect if we have no activity
97+
// and no pending messages, indicating a truly dead connection
9698
if (this.pendingMessages.size === 0) {
97-
this.state = "DISCONNECTED";
99+
// Add a small delay to allow for network recovery before declaring disconnection
100+
setTimeout(() => {
101+
if (this.pendingMessages.size === 0 && this.state === "CONNECTED") {
102+
this.state = "DISCONNECTED";
103+
}
104+
}, 1000);
98105
}
99106
});
100107
}

src/AgentClient/WebSocketClient.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,9 @@ export class WebSocketClient extends Disposable {
235235
);
236236
}
237237

238+
// Update lastActivity on send to prevent heartbeat suppression
239+
this.lastActivity = Date.now();
240+
238241
// This is an async operation in Node, but to avoid wrapping every send in a promise, we
239242
// rely on the error listener to deal with any errors. Any unsent messages will be timed out
240243
// by our PendingMessage logic

src/AgentClient/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ import { SandboxClient } from "../SandboxClient";
2525
import { InitStatus } from "../pitcher-protocol/messages/system";
2626

2727
// Timeout for detecting a pong response, leading to a forced disconnect
28-
let PONG_DETECTION_TIMEOUT = 15_000;
28+
// Increased from 15s to 30s to be more tolerant of network latency
29+
let PONG_DETECTION_TIMEOUT = 30_000;
2930

3031
// When focusing the app we do a lower timeout to more quickly detect a potential disconnect
3132
const FOCUS_PONG_DETECTION_TIMEOUT = 5_000;

src/SandboxClient/index.ts

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ export class SandboxClient {
195195
this.attemptAutoReconnect();
196196
}
197197
} else if (state === "CONNECTED") {
198+
// Reset keep-alive failures on successful connection
199+
this.keepAliveFailures = 0;
198200
if (this.shouldKeepAlive) {
199201
this.keepActiveWhileConnected(true);
200202
}
@@ -389,6 +391,8 @@ export class SandboxClient {
389391
private keepAliveInterval: NodeJS.Timeout | null = null;
390392
private shouldKeepAlive = false;
391393
private isExplicitlyDisconnected = false;
394+
private keepAliveFailures = 0;
395+
private maxKeepAliveFailures = 3;
392396
/**
393397
* If enabled, we will keep the sandbox from hibernating as long as the SDK is connected to it.
394398
*/
@@ -399,9 +403,31 @@ export class SandboxClient {
399403
if (enabled) {
400404
if (!this.keepAliveInterval) {
401405
this.keepAliveInterval = setInterval(() => {
402-
this.agentClient.system.update().catch((error) => {
403-
console.warn("Unable to keep active while connected", error);
404-
});
406+
this.agentClient.system.update()
407+
.then(() => {
408+
// Reset failure count on success
409+
this.keepAliveFailures = 0;
410+
})
411+
.catch((error) => {
412+
this.keepAliveFailures++;
413+
console.warn(`Keep-alive failed (${this.keepAliveFailures}/${this.maxKeepAliveFailures}):`, error);
414+
415+
// If we've hit max failures, stop aggressive keep-alive to prevent connection thrashing
416+
if (this.keepAliveFailures >= this.maxKeepAliveFailures) {
417+
console.warn("Max keep-alive failures reached, reducing frequency to prevent connection issues");
418+
if (this.keepAliveInterval) {
419+
clearInterval(this.keepAliveInterval);
420+
this.keepAliveInterval = null;
421+
}
422+
// Restart with longer interval after failures
423+
setTimeout(() => {
424+
if (this.shouldKeepAlive && !this.keepAliveInterval) {
425+
this.keepActiveWhileConnected(true);
426+
this.keepAliveFailures = 0; // Reset for retry
427+
}
428+
}, 60000); // Wait 1 minute before retrying
429+
}
430+
});
405431
}, 1000 * 10);
406432
}
407433
} else {

src/utils/api.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,15 @@ export type HandledResponse<D, E> = {
6868
export function getStartOptions(opts: StartSandboxOpts | undefined) {
6969
if (!opts) return {};
7070

71+
// Warn about hibernation timeouts that are too short and may cause connection issues
72+
if (opts.hibernationTimeoutSeconds !== undefined && opts.hibernationTimeoutSeconds < 60) {
73+
console.warn(
74+
`Warning: hibernationTimeoutSeconds (${opts.hibernationTimeoutSeconds}s) is less than 60 seconds. ` +
75+
`This may cause connection instability and frequent disconnections. ` +
76+
`Consider using at least 60 seconds for stable websocket connections.`
77+
);
78+
}
79+
7180
return {
7281
ipcountry: opts.ipcountry,
7382
tier: opts.vmTier?.name,

0 commit comments

Comments
 (0)