Skip to content

Commit 0ffc067

Browse files
committed
feat(rule): support keepAlive option experimentally
1 parent 5cac5fb commit 0ffc067

File tree

2 files changed

+115
-75
lines changed

2 files changed

+115
-75
lines changed

ReadMe.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ The default options are:
6565
"ignore": [],
6666
"preferGET": [],
6767
"ignoreRedirects": false,
68+
"concurrency": 8,
6869
"retry": 3
6970
}
7071
}
@@ -133,6 +134,11 @@ Example:
133134
This rule checks for redirects (3xx status codes) and consider's them an error by default.
134135
To ignore redirects during checks, set this value to `false`.
135136

137+
### concurrency
138+
139+
This rule checks links concurrently.
140+
The default concurrency count is `8`.
141+
136142
### retry
137143

138144
This rule checks the url with retry.

src/no-dead-link.js

Lines changed: 109 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@ import { isAbsolute } from 'path';
77
import { getURLOrigin } from 'get-url-origin';
88
import pMemoize from 'p-memoize';
99
import pAll from 'p-all';
10+
import * as http from 'http';
11+
import * as https from 'https';
1012

1113
const DEFAULT_OPTIONS = {
1214
checkRelative: true, // {boolean} `false` disables the checks for relative URIs.
1315
baseURI: null, // {String|null} a base URI to resolve relative URIs.
1416
ignore: [], // {Array<String>} URIs to be skipped from availability checks.
1517
preferGET: [], // {Array<String>} origins to prefer GET over HEAD.
16-
concurrency: 8, // {number} Concurrency count of linting link,
18+
concurrency: 2, // {number} Concurrency count of linting link
1719
retry: 3, // {number} Max retry count
20+
keepAlive: true, // {boolean} if it is true, use keepAlive for checking request [experimental]
1821
};
1922

2023
// Adopted from http://stackoverflow.com/a/3809435/951517
@@ -82,89 +85,119 @@ function waitTimeMs(ms) {
8285
}
8386

8487
/**
85-
* Checks if a given URI is alive or not.
86-
*
87-
* Normally, this method following strategiry about retry
88-
*
89-
* 1. Head
90-
* 2. Get
91-
* 3. Get
92-
*
93-
* @param {string} uri
94-
* @param {string} method
95-
* @param {number} maxRetryCount
96-
* @param {number} currentRetryCount
97-
* @return {{ ok: boolean, redirect?: string, message: string }}
88+
* Create isAliveURI function with options
89+
* @param {object} options
90+
* @returns {isAliveURI}
9891
*/
99-
async function isAliveURI(uri, method = 'HEAD', maxRetryCount = 3, currentRetryCount = 0) {
100-
const { host } = URL.parse(uri);
101-
const opts = {
102-
method,
103-
// Disable gzip compression in Node.js
104-
// to avoid the zlib's "unexpected end of file" error
105-
// https://github.com/request/request/issues/2045
106-
compress: false,
107-
// Some website require UserAgent and Accept header
108-
// to avoid ECONNRESET error
109-
// https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111
110-
headers: {
111-
'User-Agent': 'textlint-rule-no-dead-link/1.0',
112-
'Accept': '*/*',
113-
// Same host for target url
114-
// https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111
115-
'Host': host,
116-
},
117-
// Use `manual` redirect behaviour to get HTTP redirect status code
118-
// and see what kind of redirect is occurring
119-
redirect: 'manual',
92+
const createCheckAliveURL = (options) => {
93+
const keepAliveAgents = {
94+
http: new http.Agent({ keepAlive: true }),
95+
https: new https.Agent({ keepAlive: true }),
12096
};
121-
try {
122-
const res = await fetch(uri, opts);
97+
/**
98+
* Use library agent, avoid to use global.http(s)Agent
99+
* Want to avoid Socket hang up
100+
* @param parsedURL
101+
* @returns {module:http.Agent|null|module:https.Agent}
102+
*/
103+
const getAgent = (parsedURL) => {
104+
if (!options.keepAlive) {
105+
return null;
106+
}
107+
if (parsedURL.protocol === 'http:') {
108+
return keepAliveAgents.http;
109+
}
110+
return keepAliveAgents.https;
111+
};
112+
/**
113+
* Checks if a given URI is alive or not.
114+
*
115+
* Normally, this method following strategiry about retry
116+
*
117+
* 1. Head
118+
* 2. Get
119+
* 3. Get
120+
*
121+
* @param {string} uri
122+
* @param {string} method
123+
* @param {number} maxRetryCount
124+
* @param {number} currentRetryCount
125+
* @return {{ ok: boolean, redirect?: string, message: string }}
126+
*/
127+
return async function isAliveURI(uri, method = 'HEAD', maxRetryCount = 3, currentRetryCount = 0) {
128+
const { host } = URL.parse(uri);
129+
130+
const opts = {
131+
method,
132+
// Disable gzip compression in Node.js
133+
// to avoid the zlib's "unexpected end of file" error
134+
// https://github.com/request/request/issues/2045
135+
compress: false,
136+
// Some website require UserAgent and Accept header
137+
// to avoid ECONNRESET error
138+
// https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111
139+
headers: {
140+
'User-Agent': 'textlint-rule-no-dead-link/1.0',
141+
'Accept': '*/*',
142+
// Same host for target url
143+
// https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111
144+
'Host': host,
145+
},
146+
// Use `manual` redirect behaviour to get HTTP redirect status code
147+
// and see what kind of redirect is occurring
148+
redirect: 'manual',
149+
// custom http(s).agent
150+
agent: getAgent,
151+
};
152+
try {
153+
const res = await fetch(uri, opts);
154+
155+
if (isRedirect(res.status)) {
156+
const finalRes = await fetch(
157+
uri,
158+
Object.assign({}, opts, { redirect: 'follow' }),
159+
);
160+
161+
const { hash } = URL.parse(uri);
162+
return {
163+
ok: finalRes.ok,
164+
redirected: true,
165+
redirectTo: hash !== null ? `${finalRes.url}${hash}` : finalRes.url,
166+
message: `${res.status} ${res.statusText}`,
167+
};
168+
}
123169

124-
if (isRedirect(res.status)) {
125-
const finalRes = await fetch(
126-
uri,
127-
Object.assign({}, opts, { redirect: 'follow' }),
128-
);
170+
if (!res.ok && method === 'HEAD' && currentRetryCount < maxRetryCount) {
171+
return isAliveURI(uri, 'GET', maxRetryCount, currentRetryCount + 1);
172+
}
129173

130-
const { hash } = URL.parse(uri);
174+
// try to fetch again if not reach max retry count
175+
if (currentRetryCount < maxRetryCount) {
176+
// exponential retry
177+
// 0ms -> 100ms -> 200ms -> 400ms -> 800ms ...
178+
await waitTimeMs((currentRetryCount ** 2) * 100);
179+
return isAliveURI(uri, 'GET', maxRetryCount, currentRetryCount + 1);
180+
}
131181
return {
132-
ok: finalRes.ok,
133-
redirected: true,
134-
redirectTo: hash !== null ? `${finalRes.url}${hash}` : finalRes.url,
182+
ok: res.ok,
135183
message: `${res.status} ${res.statusText}`,
136184
};
137-
}
138-
139-
if (!res.ok && method === 'HEAD' && currentRetryCount < maxRetryCount) {
140-
return isAliveURI(uri, 'GET', maxRetryCount, currentRetryCount + 1);
141-
}
185+
} catch (ex) {
186+
// Retry with `GET` method if the request failed
187+
// as some servers don't accept `HEAD` requests but are OK with `GET` requests.
188+
// https://github.com/textlint-rule/textlint-rule-no-dead-link/pull/86
189+
if (method === 'HEAD' && currentRetryCount < maxRetryCount) {
190+
return isAliveURI(uri, 'GET', maxRetryCount, currentRetryCount + 1);
191+
}
142192

143-
// try to fetch again if not reach max retry count
144-
if (currentRetryCount < maxRetryCount) {
145-
// exponential retry
146-
// 0ms -> 100ms -> 200ms -> 400ms -> 800ms ...
147-
await waitTimeMs((currentRetryCount ** 2) * 100);
148-
return isAliveURI(uri, 'GET', maxRetryCount, currentRetryCount + 1);
149-
}
150-
return {
151-
ok: res.ok,
152-
message: `${res.status} ${res.statusText}`,
153-
};
154-
} catch (ex) {
155-
// Retry with `GET` method if the request failed
156-
// as some servers don't accept `HEAD` requests but are OK with `GET` requests.
157-
// https://github.com/textlint-rule/textlint-rule-no-dead-link/pull/86
158-
if (method === 'HEAD' && currentRetryCount < maxRetryCount) {
159-
return isAliveURI(uri, 'GET', maxRetryCount, currentRetryCount + 1);
193+
return {
194+
ok: false,
195+
message: ex.message,
196+
};
160197
}
198+
};
161199

162-
return {
163-
ok: false,
164-
message: ex.message,
165-
};
166-
}
167-
}
200+
};
168201

169202
/**
170203
* Check if a given file exists
@@ -188,7 +221,8 @@ function reporter(context, options = {}) {
188221
const { Syntax, getSource, report, RuleError, fixer, getFilePath } = context;
189222
const helper = new RuleHelper(context);
190223
const opts = Object.assign({}, DEFAULT_OPTIONS, options);
191-
// 30sec cache
224+
const isAliveURI = createCheckAliveURL(opts);
225+
// 30sec memorized
192226
const memorizedIsAliveURI = pMemoize(isAliveURI, {
193227
maxAge: 30 * 1000,
194228
});

0 commit comments

Comments
 (0)