diff --git a/README.md b/README.md index acc0a1f1..94c90725 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,15 @@ $ linkinator LOCATIONS [ --arguments ] Automatically retry requests that return HTTP 429 responses and include a 'retry-after' header. Defaults to false. + --retry-errors, + Automatically retry requests that return 5xx or unknown response. + + --retry-errors-count, + How many times should an error be retried? + + --retry-errors-jitter, + Random jitter applied to error retry. + --server-root When scanning a locally directory, customize the location on disk where the server is started. Defaults to the path passed in [LOCATION]. diff --git a/src/cli.ts b/src/cli.ts index 7e15e2bf..a60575ff 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -61,6 +61,15 @@ const cli = meow( Automatically retry requests that return HTTP 429 responses and include a 'retry-after' header. Defaults to false. + --retry-errors, + Automatically retry requests that return 5xx or unknown response. + + --retry-errors-count, + How many times should an error be retried? + + --retry-errors-jitter, + Random jitter applied to error retry. + --server-root When scanning a locally directory, customize the location on disk where the server is started. Defaults to the path passed in [LOCATION]. @@ -102,6 +111,9 @@ const cli = meow( verbosity: {type: 'string'}, directoryListing: {type: 'boolean'}, retry: {type: 'boolean'}, + retryErrors: {type: 'boolean'}, + retryErrorsCount: {type: 'number', default: 3}, + retryErrorsJitter: {type: 'number', default: 3000}, urlRewriteSearch: {type: 'string'}, urlReWriteReplace: {type: 'string'}, }, @@ -163,6 +175,9 @@ async function main() { serverRoot: flags.serverRoot, directoryListing: flags.directoryListing, retry: flags.retry, + retryErrors: flags.retryErrors, + retryErrorsCount: Number(flags.retryErrorsCount), + retryErrorsJitter: Number(flags.retryErrorsJitter), }; if (flags.skip) { if (typeof flags.skip === 'string') { diff --git a/src/config.ts b/src/config.ts index 60a8f619..eb235667 100644 --- a/src/config.ts +++ b/src/config.ts @@ -16,6 +16,9 @@ export interface Flags { serverRoot?: string; directoryListing?: boolean; retry?: boolean; + retryErrors?: boolean; + retryErrorsCount?: number; + retryErrorsJitter?: number; urlRewriteSearch?: string; urlRewriteReplace?: string; } diff --git a/src/index.ts b/src/index.ts index fe1b77b3..7f19c6c5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -46,10 +46,14 @@ interface CrawlOptions { results: LinkResult[]; cache: Set; delayCache: Map; + retryErrorsCache: Map; checkOptions: CheckOptions; queue: Queue; rootPath: string; retry: boolean; + retryErrors: boolean; + retryErrorsCount: number; + retryErrorsJitter: number; } // Spoof a normal looking User-Agent to keep the servers happy @@ -113,6 +117,7 @@ export class LinkChecker extends EventEmitter { const results = new Array(); const initCache: Set = new Set(); const delayCache: Map = new Map(); + const retryErrorsCache: Map = new Map(); for (const path of options.path) { const url = new URL(path); @@ -125,9 +130,13 @@ export class LinkChecker extends EventEmitter { results, cache: initCache, delayCache, + retryErrorsCache, queue, rootPath: path, retry: !!opts.retry, + retryErrors: !!opts.retryErrors, + retryErrorsCount: opts.retryErrorsCount ?? 3, + retryErrorsJitter: opts.retryErrorsJitter ?? 3000, }); }); } @@ -293,6 +302,12 @@ export class LinkChecker extends EventEmitter { shouldRecurse = isHtml(res); } + // If retryErrors is enabled, retry 5xx and 0 status (which indicates + // a network error likely occurred): + if (this.shouldRetryOnError(status, opts)) { + return; + } + // Assume any 2xx status is 👌 if (status >= 200 && status < 300) { state = LinkState.OK; @@ -354,12 +369,16 @@ export class LinkChecker extends EventEmitter { crawl, cache: opts.cache, delayCache: opts.delayCache, + retryErrorsCache: opts.retryErrorsCache, results: opts.results, checkOptions: opts.checkOptions, queue: opts.queue, parent: opts.url.href, rootPath: opts.rootPath, retry: opts.retry, + retryErrors: opts.retryErrors, + retryErrorsCount: opts.retryErrorsCount, + retryErrorsJitter: opts.retryErrorsJitter, }); }); } @@ -404,7 +423,6 @@ export class LinkChecker extends EventEmitter { } else { opts.delayCache.set(opts.url.host, retryAfter); } - opts.queue.add( async () => { await this.crawl(opts); @@ -421,6 +439,50 @@ export class LinkChecker extends EventEmitter { this.emit('retry', retryDetails); return true; } + /** + * If the response is a 5xx or synthetic 0 response retry N times. + * @param status Status returned by request or 0 if request threw. + * @param opts CrawlOptions used during this request + */ + shouldRetryOnError(status: number, opts: CrawlOptions): boolean { + const maxRetries = opts.retryErrorsCount; + const retryAfter = opts.retryErrorsJitter; + + if (!opts.retryErrors) { + return false; + } + + // Only retry 0 and >5xx status codes: + if (status > 0 && status < 500) { + return false; + } + + // check to see if there is already a request to wait for this host + if (opts.retryErrorsCache.has(opts.url.host)) { + // use whichever time is higher in the cache + const currentRetries = opts.retryErrorsCache.get(opts.url.host)!; + if (currentRetries > maxRetries) return false; + opts.retryErrorsCache.set(opts.url.host, currentRetries + 1); + } else { + opts.retryErrorsCache.set(opts.url.host, 1); + } + + opts.queue.add( + async () => { + await this.crawl(opts); + }, + { + delay: retryAfter, + } + ); + const retryDetails: RetryInfo = { + url: opts.url.href, + status: status, + secondsUntilRetry: Math.round(retryAfter / 1000), + }; + this.emit('retry', retryDetails); + return true; + } } /** diff --git a/src/options.ts b/src/options.ts index 00c7f880..b0841b9d 100644 --- a/src/options.ts +++ b/src/options.ts @@ -22,6 +22,9 @@ export interface CheckOptions { serverRoot?: string; directoryListing?: boolean; retry?: boolean; + retryErrors?: boolean; + retryErrorsCount?: number; + retryErrorsJitter?: number; urlRewriteExpressions?: UrlRewriteExpression[]; } @@ -145,5 +148,6 @@ export async function processOptions( options.syntheticServerRoot = options.serverRoot; } } + return options; } diff --git a/test/test.retry.ts b/test/test.retry.ts index 061ddf4e..347dfa59 100644 --- a/test/test.retry.ts +++ b/test/test.retry.ts @@ -210,4 +210,52 @@ describe('retries', () => { }); return {promise, resolve, reject}; } + + describe('retry-errors', () => { + it('should retry 5xx status code', async () => { + const scope = nock('http://fake.local') + .get('/') + .reply(522) + .get('/') + .reply(200); + + const {promise, resolve} = invertedPromise(); + const checker = new LinkChecker().on('retry', resolve); + const clock = sinon.useFakeTimers({ + shouldAdvanceTime: true, + }); + const checkPromise = checker.check({ + path: 'test/fixtures/basic', + retryErrors: true, + }); + await promise; + await clock.tickAsync(5000); + const results = await checkPromise; + assert.ok(results.passed); + scope.done(); + }); + + it('should retry 0 status code', async () => { + const scope = nock('http://fake.local') + .get('/') + .replyWithError({code: 'ETIMEDOUT'}) + .get('/') + .reply(200); + + const {promise, resolve} = invertedPromise(); + const checker = new LinkChecker().on('retry', resolve); + const clock = sinon.useFakeTimers({ + shouldAdvanceTime: true, + }); + const checkPromise = checker.check({ + path: 'test/fixtures/basic', + retryErrors: true, + }); + await promise; + await clock.tickAsync(5000); + const results = await checkPromise; + assert.ok(results.passed); + scope.done(); + }); + }); });