@@ -7,14 +7,17 @@ import { isAbsolute } from 'path';
77import { getURLOrigin } from 'get-url-origin' ;
88import pMemoize from 'p-memoize' ;
99import pAll from 'p-all' ;
10+ import * as http from 'http' ;
11+ import * as https from 'https' ;
1012
1113const DEFAULT_OPTIONS = {
1214 checkRelative : true , // {boolean} `false` disables the checks for relative URIs.
1315 baseURI : null , // {String|null} a base URI to resolve relative URIs.
1416 ignore : [ ] , // {Array<String>} URIs to be skipped from availability checks.
1517 preferGET : [ ] , // {Array<String>} origins to prefer GET over HEAD.
16- concurrency : 8 , // {number} Concurrency count of linting link,
18+ concurrency : 2 , // {number} Concurrency count of linting link
1719 retry : 3 , // {number} Max retry count
20+ keepAlive : true , // {boolean} if it is true, use keepAlive for checking request [experimental]
1821} ;
1922
2023// Adopted from http://stackoverflow.com/a/3809435/951517
@@ -82,89 +85,119 @@ function waitTimeMs(ms) {
8285}
8386
8487/**
85- * Checks if a given URI is alive or not.
86- *
87- * Normally, this method following strategiry about retry
88- *
89- * 1. Head
90- * 2. Get
91- * 3. Get
92- *
93- * @param {string } uri
94- * @param {string } method
95- * @param {number } maxRetryCount
96- * @param {number } currentRetryCount
97- * @return {{ ok: boolean, redirect?: string, message: string } }
88+ * Create isAliveURI function with options
89+ * @param {object } options
90+ * @returns {isAliveURI }
9891 */
99- async function isAliveURI ( uri , method = 'HEAD' , maxRetryCount = 3 , currentRetryCount = 0 ) {
100- const { host } = URL . parse ( uri ) ;
101- const opts = {
102- method,
103- // Disable gzip compression in Node.js
104- // to avoid the zlib's "unexpected end of file" error
105- // https://github.com/request/request/issues/2045
106- compress : false ,
107- // Some website require UserAgent and Accept header
108- // to avoid ECONNRESET error
109- // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111
110- headers : {
111- 'User-Agent' : 'textlint-rule-no-dead-link/1.0' ,
112- 'Accept' : '*/*' ,
113- // Same host for target url
114- // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111
115- 'Host' : host ,
116- } ,
117- // Use `manual` redirect behaviour to get HTTP redirect status code
118- // and see what kind of redirect is occurring
119- redirect : 'manual' ,
92+ const createCheckAliveURL = ( options ) => {
93+ const keepAliveAgents = {
94+ http : new http . Agent ( { keepAlive : true } ) ,
95+ https : new https . Agent ( { keepAlive : true } ) ,
12096 } ;
121- try {
122- const res = await fetch ( uri , opts ) ;
97+ /**
98+ * Use library agent, avoid to use global.http(s)Agent
99+ * Want to avoid Socket hang up
100+ * @param parsedURL
101+ * @returns {module:http.Agent|null|module:https.Agent }
102+ */
103+ const getAgent = ( parsedURL ) => {
104+ if ( ! options . keepAlive ) {
105+ return null ;
106+ }
107+ if ( parsedURL . protocol === 'http:' ) {
108+ return keepAliveAgents . http ;
109+ }
110+ return keepAliveAgents . https ;
111+ } ;
112+ /**
113+ * Checks if a given URI is alive or not.
114+ *
115+ * Normally, this method following strategiry about retry
116+ *
117+ * 1. Head
118+ * 2. Get
119+ * 3. Get
120+ *
121+ * @param {string } uri
122+ * @param {string } method
123+ * @param {number } maxRetryCount
124+ * @param {number } currentRetryCount
125+ * @return {{ ok: boolean, redirect?: string, message: string } }
126+ */
127+ return async function isAliveURI ( uri , method = 'HEAD' , maxRetryCount = 3 , currentRetryCount = 0 ) {
128+ const { host } = URL . parse ( uri ) ;
129+
130+ const opts = {
131+ method,
132+ // Disable gzip compression in Node.js
133+ // to avoid the zlib's "unexpected end of file" error
134+ // https://github.com/request/request/issues/2045
135+ compress : false ,
136+ // Some website require UserAgent and Accept header
137+ // to avoid ECONNRESET error
138+ // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111
139+ headers : {
140+ 'User-Agent' : 'textlint-rule-no-dead-link/1.0' ,
141+ 'Accept' : '*/*' ,
142+ // Same host for target url
143+ // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111
144+ 'Host' : host ,
145+ } ,
146+ // Use `manual` redirect behaviour to get HTTP redirect status code
147+ // and see what kind of redirect is occurring
148+ redirect : 'manual' ,
149+ // custom http(s).agent
150+ agent : getAgent ,
151+ } ;
152+ try {
153+ const res = await fetch ( uri , opts ) ;
154+
155+ if ( isRedirect ( res . status ) ) {
156+ const finalRes = await fetch (
157+ uri ,
158+ Object . assign ( { } , opts , { redirect : 'follow' } ) ,
159+ ) ;
160+
161+ const { hash } = URL . parse ( uri ) ;
162+ return {
163+ ok : finalRes . ok ,
164+ redirected : true ,
165+ redirectTo : hash !== null ? `${ finalRes . url } ${ hash } ` : finalRes . url ,
166+ message : `${ res . status } ${ res . statusText } ` ,
167+ } ;
168+ }
123169
124- if ( isRedirect ( res . status ) ) {
125- const finalRes = await fetch (
126- uri ,
127- Object . assign ( { } , opts , { redirect : 'follow' } ) ,
128- ) ;
170+ if ( ! res . ok && method === 'HEAD' && currentRetryCount < maxRetryCount ) {
171+ return isAliveURI ( uri , 'GET' , maxRetryCount , currentRetryCount + 1 ) ;
172+ }
129173
130- const { hash } = URL . parse ( uri ) ;
174+ // try to fetch again if not reach max retry count
175+ if ( currentRetryCount < maxRetryCount ) {
176+ // exponential retry
177+ // 0ms -> 100ms -> 200ms -> 400ms -> 800ms ...
178+ await waitTimeMs ( ( currentRetryCount ** 2 ) * 100 ) ;
179+ return isAliveURI ( uri , 'GET' , maxRetryCount , currentRetryCount + 1 ) ;
180+ }
131181 return {
132- ok : finalRes . ok ,
133- redirected : true ,
134- redirectTo : hash !== null ? `${ finalRes . url } ${ hash } ` : finalRes . url ,
182+ ok : res . ok ,
135183 message : `${ res . status } ${ res . statusText } ` ,
136184 } ;
137- }
138-
139- if ( ! res . ok && method === 'HEAD' && currentRetryCount < maxRetryCount ) {
140- return isAliveURI ( uri , 'GET' , maxRetryCount , currentRetryCount + 1 ) ;
141- }
185+ } catch ( ex ) {
186+ // Retry with `GET` method if the request failed
187+ // as some servers don't accept `HEAD` requests but are OK with `GET` requests.
188+ // https://github.com/textlint-rule/textlint-rule-no-dead-link/pull/86
189+ if ( method === 'HEAD' && currentRetryCount < maxRetryCount ) {
190+ return isAliveURI ( uri , 'GET' , maxRetryCount , currentRetryCount + 1 ) ;
191+ }
142192
143- // try to fetch again if not reach max retry count
144- if ( currentRetryCount < maxRetryCount ) {
145- // exponential retry
146- // 0ms -> 100ms -> 200ms -> 400ms -> 800ms ...
147- await waitTimeMs ( ( currentRetryCount ** 2 ) * 100 ) ;
148- return isAliveURI ( uri , 'GET' , maxRetryCount , currentRetryCount + 1 ) ;
149- }
150- return {
151- ok : res . ok ,
152- message : `${ res . status } ${ res . statusText } ` ,
153- } ;
154- } catch ( ex ) {
155- // Retry with `GET` method if the request failed
156- // as some servers don't accept `HEAD` requests but are OK with `GET` requests.
157- // https://github.com/textlint-rule/textlint-rule-no-dead-link/pull/86
158- if ( method === 'HEAD' && currentRetryCount < maxRetryCount ) {
159- return isAliveURI ( uri , 'GET' , maxRetryCount , currentRetryCount + 1 ) ;
193+ return {
194+ ok : false ,
195+ message : ex . message ,
196+ } ;
160197 }
198+ } ;
161199
162- return {
163- ok : false ,
164- message : ex . message ,
165- } ;
166- }
167- }
200+ } ;
168201
169202/**
170203 * Check if a given file exists
@@ -188,7 +221,8 @@ function reporter(context, options = {}) {
188221 const { Syntax, getSource, report, RuleError, fixer, getFilePath } = context ;
189222 const helper = new RuleHelper ( context ) ;
190223 const opts = Object . assign ( { } , DEFAULT_OPTIONS , options ) ;
191- // 30sec cache
224+ const isAliveURI = createCheckAliveURL ( opts ) ;
225+ // 30sec memorized
192226 const memorizedIsAliveURI = pMemoize ( isAliveURI , {
193227 maxAge : 30 * 1000 ,
194228 } ) ;
0 commit comments