diff --git a/README.md b/README.md index 3dc7f486..acc0a1f1 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,12 @@ $ linkinator LOCATIONS [ --arguments ] --timeout Request timeout in ms. Defaults to 0 (no timeout). + --url-rewrite-search + Pattern to search for in urls. Must be used with --url-rewrite-replace. + + --url-rewrite-replace + Expression used to replace search content. Must be used with --url-rewrite-search. + --verbosity Override the default verbosity for this command. Available options are 'debug', 'info', 'warning', 'error', and 'none'. Defaults to 'warning'. @@ -200,6 +206,7 @@ where the server is started. Defaults to the path passed in `path`. - `markdown` (boolean) - Automatically parse and scan markdown if scanning from a location on disk. - `linksToSkip` (array | function) - An array of regular expression strings that should be skipped, OR an async function that's called for each link with the link URL as its only argument. Return a Promise that resolves to `true` to skip the link or `false` to check it. - `directoryListing` (boolean) - Automatically serve a static file listing page when serving a directory. Defaults to `false`. +- `urlRewriteExpressions` (array) - Collection of objects that contain a search pattern, and replacement. ### linkinator.LinkChecker() diff --git a/src/cli.ts b/src/cli.ts index 6863a015..7e15e2bf 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -71,6 +71,12 @@ const cli = meow( --timeout Request timeout in ms. Defaults to 0 (no timeout). + --url-rewrite-search + Pattern to search for in urls. Must be used with --url-rewrite-replace. + + --url-rewrite-replace + Expression used to replace search content. Must be used with --url-rewrite-search. + --verbosity Override the default verbosity for this command. Available options are 'debug', 'info', 'warning', 'error', and 'none'. Defaults to 'warning'. @@ -96,6 +102,8 @@ const cli = meow( verbosity: {type: 'string'}, directoryListing: {type: 'boolean'}, retry: {type: 'boolean'}, + urlRewriteSearch: {type: 'string'}, + urlReWriteReplace: {type: 'string'}, }, booleanDefault: undefined, } @@ -109,6 +117,14 @@ async function main() { return; } flags = await getConfig(cli.flags); + if ( + (flags.urlRewriteReplace && !flags.urlRewriteSearch) || + (flags.urlRewriteSearch && !flags.urlRewriteReplace) + ) { + throw new Error( + 'The url-rewrite-replace flag must be used with the url-rewrite-search flag.' + ); + } const start = Date.now(); const verbosity = parseVerbosity(flags); @@ -155,6 +171,14 @@ async function main() { opts.linksToSkip = flags.skip; } } + if (flags.urlRewriteSearch && flags.urlRewriteReplace) { + opts.urlRewriteExpressions = [ + { + pattern: new RegExp(flags.urlRewriteSearch), + replacement: flags.urlRewriteReplace, + }, + ]; + } const result = await checker.check(opts); const filteredResults = result.links.filter(link => { switch (link.state) { diff --git a/src/config.ts b/src/config.ts index 65509b1d..60a8f619 100644 --- a/src/config.ts +++ b/src/config.ts @@ -16,6 +16,8 @@ export interface Flags { serverRoot?: string; directoryListing?: boolean; retry?: boolean; + urlRewriteSearch?: string; + urlRewriteReplace?: string; } export async function getConfig(flags: Flags) { diff --git a/src/index.ts b/src/index.ts index 14f03b8e..02a0b287 100644 --- a/src/index.ts +++ b/src/index.ts @@ -144,6 +144,16 @@ export class LinkChecker extends EventEmitter { * @returns A list of crawl results consisting of urls and status codes */ async crawl(opts: CrawlOptions): Promise { + // apply any regex url replacements + if (opts.checkOptions.urlRewriteExpressions) { + for (const exp of opts.checkOptions.urlRewriteExpressions) { + const newUrl = opts.url.href.replace(exp.pattern, exp.replacement); + if (opts.url.href !== newUrl) { + opts.url.href = newUrl; + } + } + } + // explicitly skip non-http[s] links before making the request const proto = opts.url.protocol; if (proto !== 'http:' && proto !== 'https:') { diff --git a/src/options.ts b/src/options.ts index f69fa346..00c7f880 100644 --- a/src/options.ts +++ b/src/options.ts @@ -6,6 +6,11 @@ import * as globby from 'glob'; const stat = util.promisify(fs.stat); const glob = util.promisify(globby); +export interface UrlRewriteExpression { + pattern: RegExp; + replacement: string; +} + export interface CheckOptions { concurrency?: number; port?: number; @@ -17,6 +22,7 @@ export interface CheckOptions { serverRoot?: string; directoryListing?: boolean; retry?: boolean; + urlRewriteExpressions?: UrlRewriteExpression[]; } export interface InternalCheckOptions extends CheckOptions { diff --git a/test/fixtures/rewrite/LICENSE.md b/test/fixtures/rewrite/LICENSE.md new file mode 100644 index 00000000..3336d558 --- /dev/null +++ b/test/fixtures/rewrite/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) Justin Beckwith (jbeckwith.com) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/test/fixtures/rewrite/README.md b/test/fixtures/rewrite/README.md new file mode 100644 index 00000000..1765d51e --- /dev/null +++ b/test/fixtures/rewrite/README.md @@ -0,0 +1,2 @@ +# Say hello to my README +This has [a link](NOTLICENSE.md) to something. diff --git a/test/test.cli.ts b/test/test.cli.ts index 7bf6e8b2..4e055144 100644 --- a/test/test.cli.ts +++ b/test/test.cli.ts @@ -202,6 +202,42 @@ describe('cli', function () { assert.strictEqual(res.exitCode, 0); }); + it('should fail if a url search is provided without a replacement', async () => { + const res = await execa( + node, + [linkinator, '--url-rewrite-search', 'boop', 'test/fixtures/basic'], + { + reject: false, + } + ); + assert.strictEqual(res.exitCode, 1); + assert.match(res.stderr, /flag must be used/); + }); + + it('should fail if a url replacement is provided without a search', async () => { + const res = await execa( + node, + [linkinator, '--url-rewrite-replace', 'beep', 'test/fixtures/basic'], + { + reject: false, + } + ); + assert.strictEqual(res.exitCode, 1); + assert.match(res.stderr, /flag must be used/); + }); + + it('should respect url rewrites', async () => { + const res = await execa(node, [ + linkinator, + '--url-rewrite-search', + 'NOTLICENSE.md', + '--url-rewrite-replace', + 'LICENSE.md', + 'test/fixtures/rewrite/README.md', + ]); + assert.match(res.stderr, /Successfully scanned/); + }); + it('should warn on retries', async () => { // start a web server to return the 429 let requestCount = 0; diff --git a/test/test.index.ts b/test/test.index.ts index 914dfd27..f4c62518 100644 --- a/test/test.index.ts +++ b/test/test.index.ts @@ -524,4 +524,17 @@ describe('linkinator', () => { assert.strictEqual(fakeLink.url, 'http://fake.local/'); scope.done(); }); + + it('should rewrite urls', async () => { + const results = await check({ + path: 'test/fixtures/rewrite/README.md', + urlRewriteExpressions: [ + { + pattern: /NOTLICENSE\.[a-z]+/, + replacement: 'LICENSE.md', + }, + ], + }); + assert.ok(results.passed); + }); });