Skip to content

Commit

Permalink
feat: add filter function as an option to linkinator.check() (#120)
Browse files Browse the repository at this point in the history
  • Loading branch information
zeke authored and JustinBeckwith committed Nov 20, 2019
1 parent 873dac6 commit 8240159
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 10 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ Asynchronous method that runs a site wide scan. Options come in the form of an o
- `concurrency` (number) - The number of connections to make simultaneously. Defaults to 100.
- `port` (number) - When the `path` is provided as a local path on disk, the `port` on which to start the temporary web server. Defaults to a random high range order port.
- `recurse` (boolean) - By default, all scans are shallow. Only the top level links on the requested page will be scanned. By setting `recurse` to `true`, the crawler will follow all links on the page, and continue scanning links **on the same domain** for as long as it can go. Results are cached, so no worries about loops.
- `linksToSkip` (array) - An array of regular expression strings that should be skipped during the scan.
- `linksToSkip` (array | function) - An array of regular expression strings that should be skipped, OR an async function that's called for each link with the link URL as its only argument. Return a Promise that resolves to `true` to skip the link or `false` to check it.

#### linkinator.LinkChecker()
Constructor method that can be used to create a new `LinkChecker` instance. This is particularly useful if you want to receive events as the crawler crawls. Exposes the following events:
Expand Down
35 changes: 26 additions & 9 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export interface CheckOptions {
port?: number;
path: string;
recurse?: boolean;
linksToSkip?: string[];
linksToSkip?: string[] | ((link: string) => Promise<boolean>);
}

export enum LinkState {
Expand Down Expand Up @@ -137,14 +137,11 @@ export class LinkChecker extends EventEmitter {
return;
}

// Check for user configured links that should be skipped
const skips = opts.checkOptions
.linksToSkip!.map(linkToSkip => {
return new RegExp(linkToSkip).test(opts.url.href);
})
.filter(match => !!match);

if (skips.length > 0) {
// Check for a user-configured function to filter out links
if (
typeof opts.checkOptions.linksToSkip === 'function' &&
(await opts.checkOptions.linksToSkip(opts.url.href))
) {
const result: LinkResult = {
url: opts.url.href,
state: LinkState.SKIPPED,
Expand All @@ -155,6 +152,26 @@ export class LinkChecker extends EventEmitter {
return;
}

// Check for a user-configured array of link regular expressions that should be skipped
if (Array.isArray(opts.checkOptions.linksToSkip)) {
const skips = opts.checkOptions.linksToSkip
.map(linkToSkip => {
return new RegExp(linkToSkip).test(opts.url.href);
})
.filter(match => !!match);

if (skips.length > 0) {
const result: LinkResult = {
url: opts.url.href,
state: LinkState.SKIPPED,
parent: opts.parent,
};
opts.results.push(result);
this.emit('link', result);
return;
}
}

// Perform a HEAD or GET request based on the need to crawl
let status = 0;
let state = LinkState.BROKEN;
Expand Down
7 changes: 7 additions & 0 deletions test/fixtures/filter/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<html>
<body>
<a href="https://good.com">I'm good</a>
<a href="http://www.filterme.com">I should be filtered</a>
<a href="https://example.com/filtermetoo">I should also be filtered</a>
</body>
</html>
16 changes: 16 additions & 0 deletions test/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,22 @@ describe('linkinator', () => {
);
});

it('should skip links if passed a linksToSkip function', async () => {
const scope = nock('https://good.com')
.head('/')
.reply(200);
const results = await check({
path: 'test/fixtures/filter',
linksToSkip: link => Promise.resolve(link.includes('filterme')),
});
assert.ok(results.passed);
assert.strictEqual(
results.links.filter(x => x.state === LinkState.SKIPPED).length,
2
);
scope.done();
});

it('should report broken links', async () => {
const scope = nock('http://fake.local')
.head('/')
Expand Down

0 comments on commit 8240159

Please sign in to comment.