Skip to content
This repository was archived by the owner on Aug 4, 2023. It is now read-only.

Commit 0073c43

Browse files
authored
fix: occasional "socket hang up" errors in APM server intake requests (#179)
Use the 'agentkeepalive' alternative to Node.js core `http.Agent` for keep-alive handling to get the `freeSocketTimeout` option that can timeout kept-alive (aka "free") sockets before getting in the range of the downstream APM Server (or Lambda extension) HTTP Keep-Alive timeout. This avoids occasional ECONNRESET errors. Refs: elastic/apm-agent-nodejs#2594
1 parent 95d7622 commit 0073c43

File tree

5 files changed

+29
-4
lines changed

5 files changed

+29
-4
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
current elastic-apm-node with this version of the client would break
2020
behavior in a Lambda environment.
2121

22+
- Add the `freeSocketTimeout` option, with a default of 4000 (ms), and switch
23+
from Node.js's core `http.Agent` to the [agentkeepalive package](https://github.com/node-modules/agentkeepalive)
24+
to fix ECONNRESET issues with HTTP Keep-Alive usage talking to APM Server
25+
(https://github.com/elastic/apm-agent-nodejs/issues/2594).
26+
2227
## v10.4.0
2328

2429
- Add APM Server version checking to the client. On creation the client will

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,15 @@ HTTP client configuration:
104104
`Infinity`)
105105
- `maxFreeSockets` - Maximum number of sockets to leave open in a free
106106
state. Only relevant if `keepAlive` is set to `true` (default: `256`)
107+
- `freeSocketTimeout` - A number of milliseconds of inactivity on a free
108+
(kept-alive) socket after which to timeout and recycle the socket. Set this to
109+
a value less than the HTTP Keep-Alive timeout of the APM server to avoid
110+
[ECONNRESET exceptions](https://medium.com/ssense-tech/reduce-networking-errors-in-nodejs-23b4eb9f2d83).
111+
This defaults to 4000ms to be less than the [node.js HTTP server default of
112+
5s](https://nodejs.org/api/http.html#serverkeepalivetimeout) (useful when
113+
using a Node.js-based mock APM server) and the [Go lang Dialer `KeepAlive`
114+
default of 15s](https://pkg.go.dev/net#Dialer) (when talking to the Elastic
115+
APM Lambda extension). (default: `4000`)
107116

108117
Cloud & Extra Metadata Configuration:
109118

index.js

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ const { performance } = require('perf_hooks')
1111
const { URL } = require('url')
1212
const zlib = require('zlib')
1313

14+
const HttpAgentKeepAlive = require('agentkeepalive')
15+
const HttpsAgentKeepAlive = HttpAgentKeepAlive.HttpsAgent
1416
const Filters = require('object-filter-sequence')
1517
const querystring = require('querystring')
1618
const Writable = require('readable-stream').Writable
@@ -244,6 +246,10 @@ Client.prototype.config = function (opts) {
244246
if (!this._conf.intakeResTimeoutOnEnd) this._conf.intakeResTimeoutOnEnd = 1000
245247
this._conf.keepAlive = this._conf.keepAlive !== false
246248
this._conf.centralConfig = this._conf.centralConfig || false
249+
if (!('keepAliveMsecs' in this._conf)) this._conf.keepAliveMsecs = 1000
250+
if (!('maxSockets' in this._conf)) this._conf.maxSockets = Infinity
251+
if (!('maxFreeSockets' in this._conf)) this._conf.maxFreeSockets = 256
252+
if (!('freeSocketTimeout' in this._conf)) this._conf.freeSocketTimeout = 4000
247253

248254
// processed values
249255
this._conf.serverUrl = new URL(this._conf.serverUrl)
@@ -260,16 +266,19 @@ Client.prototype.config = function (opts) {
260266
}
261267
}
262268

269+
let AgentKeepAlive
263270
switch (this._conf.serverUrl.protocol) {
264271
case 'http:':
265272
this._transport = http
266273
this._transportRequest = httpRequest
267274
this._transportGet = httpGet
275+
AgentKeepAlive = HttpAgentKeepAlive
268276
break
269277
case 'https:':
270278
this._transport = https
271279
this._transportRequest = httpsRequest
272280
this._transportGet = httpsGet
281+
AgentKeepAlive = HttpsAgentKeepAlive
273282
break
274283
default:
275284
throw new Error('Unknown protocol ' + this._conf.serverUrl.protocol)
@@ -281,13 +290,14 @@ Client.prototype.config = function (opts) {
281290
if (this._agent) {
282291
this._agent.destroy()
283292
}
284-
var agentOpts = {
293+
this._agent = new AgentKeepAlive({
285294
keepAlive: this._conf.keepAlive,
286295
keepAliveMsecs: this._conf.keepAliveMsecs,
296+
freeSocketTimeout: this._conf.freeSocketTimeout,
297+
timeout: this._conf.serverTimeout,
287298
maxSockets: this._conf.maxSockets,
288299
maxFreeSockets: this._conf.maxFreeSockets
289-
}
290-
this._agent = new this._transport.Agent(agentOpts)
300+
})
291301
}
292302

293303
// http request options

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"author": "Thomas Watson <[email protected]> (https://twitter.com/wa7son)",
1919
"license": "MIT",
2020
"dependencies": {
21+
"agentkeepalive": "^4.2.1",
2122
"breadth-filter": "^2.0.0",
2223
"container-info": "^1.0.1",
2324
"end-of-stream": "^1.4.4",

test/edge-cases.test.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ test('socket timeout - server response too slow', function (t) {
347347
t.ok(err, 'got a request-error from the client')
348348
const end = Date.now()
349349
const delta = end - start
350-
t.ok(delta > 1000 && delta < 2000, 'timeout should occur between 1-2 seconds')
350+
t.ok(delta > 1000 && delta < 2000, `timeout should occur between 1-2 seconds: delta=${delta}ms`)
351351
t.equal(err.message, 'APM Server response timeout (1000ms)')
352352
server.close()
353353
t.end()

0 commit comments

Comments
 (0)