From 9813a61ad31a413fad621193241d5efd34044cbe Mon Sep 17 00:00:00 2001 From: Sergey Bondar Date: Tue, 19 Nov 2019 18:26:24 +0300 Subject: [PATCH] fix: handle base correctly (#114) --- src/links.ts | 33 ++++++++- test/fixtures/basetag/absolute.html | 12 ++++ test/fixtures/basetag/empty-base.html | 11 +++ .../fixtures/basetag/relative-dot-folder.html | 11 +++ test/fixtures/basetag/relative-folder.html | 11 +++ test/fixtures/basetag/relative-page.html | 11 +++ test/fixtures/basetag/relative-to-root.html | 11 +++ test/test.ts | 71 +++++++++++++++++++ 8 files changed, 170 insertions(+), 1 deletion(-) create mode 100644 test/fixtures/basetag/absolute.html create mode 100644 test/fixtures/basetag/empty-base.html create mode 100644 test/fixtures/basetag/relative-dot-folder.html create mode 100644 test/fixtures/basetag/relative-folder.html create mode 100644 test/fixtures/basetag/relative-page.html create mode 100644 test/fixtures/basetag/relative-to-root.html diff --git a/src/links.ts b/src/links.ts index 793681fe..91c3e4b9 100644 --- a/src/links.ts +++ b/src/links.ts @@ -43,12 +43,43 @@ export function getLinks(source: string, baseUrl: string): ParsedUrl[] { links.push(...values); }); }); + + let realBaseUrl = baseUrl; + const base = $('base[href]'); + if (base.length) { + // only first !!link) - .map(link => parseLink(link, baseUrl)); + .map(link => parseLink(link, realBaseUrl)); return sanitized; } +function getBaseUrl(htmlBaseUrl: string, oldBaseUrl: string): string { + if (isAbsoluteUrl(htmlBaseUrl)) { + return htmlBaseUrl; + } + + const url = new URL(htmlBaseUrl, oldBaseUrl); + url.hash = ''; + return url.href; +} + +function isAbsoluteUrl(url: string): boolean { + // Don't match Windows paths + if (/^[a-zA-Z]:\\/.test(url)) { + return false; + } + + // Scheme: https://tools.ietf.org/html/rfc3986#section-3.1 + // Absolute URL: https://tools.ietf.org/html/rfc3986#section-4.3 + return /^[a-zA-Z][a-zA-Z\d+\-.]*:/.test(url); +} + function parseAttr(name: string, value: string): string[] { switch (name) { case 'srcset': diff --git a/test/fixtures/basetag/absolute.html b/test/fixtures/basetag/absolute.html new file mode 100644 index 00000000..a3567a65 --- /dev/null +++ b/test/fixtures/basetag/absolute.html @@ -0,0 +1,12 @@ + + + + + + + relative link + relative link + relative link + relative link + + diff --git a/test/fixtures/basetag/empty-base.html b/test/fixtures/basetag/empty-base.html new file mode 100644 index 00000000..91bb5e80 --- /dev/null +++ b/test/fixtures/basetag/empty-base.html @@ -0,0 +1,11 @@ + + + + + + relative link + relative link + relative link + relative link + + diff --git a/test/fixtures/basetag/relative-dot-folder.html b/test/fixtures/basetag/relative-dot-folder.html new file mode 100644 index 00000000..284c63b0 --- /dev/null +++ b/test/fixtures/basetag/relative-dot-folder.html @@ -0,0 +1,11 @@ + + + + + + relative link + relative link + relative link + relative link + + diff --git a/test/fixtures/basetag/relative-folder.html b/test/fixtures/basetag/relative-folder.html new file mode 100644 index 00000000..fdb40f13 --- /dev/null +++ b/test/fixtures/basetag/relative-folder.html @@ -0,0 +1,11 @@ + + + + + + relative link + relative link + relative link + relative link + + diff --git a/test/fixtures/basetag/relative-page.html b/test/fixtures/basetag/relative-page.html new file mode 100644 index 00000000..a46c122e --- /dev/null +++ b/test/fixtures/basetag/relative-page.html @@ -0,0 +1,11 @@ + + + + + + relative link + relative link + relative link + relative link + + diff --git a/test/fixtures/basetag/relative-to-root.html b/test/fixtures/basetag/relative-to-root.html new file mode 100644 index 00000000..341577eb --- /dev/null +++ b/test/fixtures/basetag/relative-to-root.html @@ -0,0 +1,11 @@ + + + + + + relative link + relative link + relative link + relative link + + diff --git a/test/test.ts b/test/test.ts index 783b8aef..cc64361c 100644 --- a/test/test.ts +++ b/test/test.ts @@ -88,6 +88,77 @@ describe('linkinator', () => { ); }); + it('should detect relative urls with relative base', async () => { + const cases = [ + { + fixture: 'test/fixtures/basetag/relative-to-root.html', + nonBrokenUrl: '/anotherBase/ok', + }, + { + fixture: 'test/fixtures/basetag/relative-folder.html', + nonBrokenUrl: '/pageBase/anotherBase/ok', + }, + { + fixture: 'test/fixtures/basetag/relative-dot-folder.html', + nonBrokenUrl: '/pageBase/anotherBase/ok', + }, + { + fixture: 'test/fixtures/basetag/relative-page.html', + nonBrokenUrl: '/pageBase/ok', + }, + { + fixture: 'test/fixtures/basetag/empty-base.html', + nonBrokenUrl: '/pageBase/ok', + }, + ]; + + for (let i = 0; i < cases.length; i++) { + const { fixture, nonBrokenUrl } = cases[i]; + const scope = nock('http://fake.local') + .get('/pageBase/index') + .replyWithFile(200, fixture, { + 'Content-Type': 'text/html; charset=UTF-8', + }) + .head(nonBrokenUrl) + .reply(200); + + const results = await check({ + path: 'http://fake.local/pageBase/index', + }); + + assert.strictEqual(results.links.length, 3); + assert.strictEqual( + results.links.filter(x => x.state === LinkState.BROKEN).length, + 1 + ); + scope.done(); + } + }); + + it('should detect relative urls with absolute base', async () => { + const scope = nock('http://fake.local') + .get('/pageBase/index') + .replyWithFile(200, 'test/fixtures/basetag/absolute.html', { + 'Content-Type': 'text/html; charset=UTF-8', + }); + + const anotherScope = nock('http://another.fake.local') + .head('/ok') + .reply(200); + + const results = await check({ + path: 'http://fake.local/pageBase/index', + }); + + assert.strictEqual(results.links.length, 3); + assert.strictEqual( + results.links.filter(x => x.state === LinkState.BROKEN).length, + 1 + ); + scope.done(); + anotherScope.done(); + }); + it('should detect broken image links', async () => { const results = await check({ path: 'test/fixtures/image' }); assert.strictEqual(