Skip to content

Commit

Permalink
fix: handle base correctly (#114)
Browse files Browse the repository at this point in the history
  • Loading branch information
marapper authored and JustinBeckwith committed Nov 19, 2019
1 parent d653f2c commit 9813a61
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 1 deletion.
33 changes: 32 additions & 1 deletion src/links.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,43 @@ export function getLinks(source: string, baseUrl: string): ParsedUrl[] {
links.push(...values);
});
});

let realBaseUrl = baseUrl;
const base = $('base[href]');
if (base.length) {
// only first <base by specification
const htmlBaseUrl = base.first().attr('href');

realBaseUrl = getBaseUrl(htmlBaseUrl, baseUrl);
}

const sanitized = links
.filter(link => !!link)
.map(link => parseLink(link, baseUrl));
.map(link => parseLink(link, realBaseUrl));
return sanitized;
}

function getBaseUrl(htmlBaseUrl: string, oldBaseUrl: string): string {
if (isAbsoluteUrl(htmlBaseUrl)) {
return htmlBaseUrl;
}

const url = new URL(htmlBaseUrl, oldBaseUrl);
url.hash = '';
return url.href;
}

function isAbsoluteUrl(url: string): boolean {
// Don't match Windows paths
if (/^[a-zA-Z]:\\/.test(url)) {
return false;
}

// Scheme: https://tools.ietf.org/html/rfc3986#section-3.1
// Absolute URL: https://tools.ietf.org/html/rfc3986#section-4.3
return /^[a-zA-Z][a-zA-Z\d+\-.]*:/.test(url);
}

function parseAttr(name: string, value: string): string[] {
switch (name) {
case 'srcset':
Expand Down
12 changes: 12 additions & 0 deletions test/fixtures/basetag/absolute.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<html>
<head>
<base href="http://another.fake.local/" />
<base href="./unused" />
</head>
<body>
<a href="ok">relative link</a>
<a href="broken">relative link</a>
<a href="./ok">relative link</a>
<a href="./broken">relative link</a>
</body>
</html>
11 changes: 11 additions & 0 deletions test/fixtures/basetag/empty-base.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<html>
<head>
<base href="" />
</head>
<body>
<a href="ok">relative link</a>
<a href="broken">relative link</a>
<a href="./ok">relative link</a>
<a href="./broken">relative link</a>
</body>
</html>
11 changes: 11 additions & 0 deletions test/fixtures/basetag/relative-dot-folder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<html>
<head>
<base href="./anotherBase/" />
</head>
<body>
<a href="ok">relative link</a>
<a href="broken">relative link</a>
<a href="./ok">relative link</a>
<a href="./broken">relative link</a>
</body>
</html>
11 changes: 11 additions & 0 deletions test/fixtures/basetag/relative-folder.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<html>
<head>
<base href="anotherBase/" />
</head>
<body>
<a href="ok">relative link</a>
<a href="broken">relative link</a>
<a href="./ok">relative link</a>
<a href="./broken">relative link</a>
</body>
</html>
11 changes: 11 additions & 0 deletions test/fixtures/basetag/relative-page.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<html>
<head>
<base href="anotherBase" />
</head>
<body>
<a href="ok">relative link</a>
<a href="broken">relative link</a>
<a href="./ok">relative link</a>
<a href="./broken">relative link</a>
</body>
</html>
11 changes: 11 additions & 0 deletions test/fixtures/basetag/relative-to-root.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<html>
<head>
<base href="/anotherBase/" />
</head>
<body>
<a href="ok">relative link</a>
<a href="broken">relative link</a>
<a href="./ok">relative link</a>
<a href="./broken">relative link</a>
</body>
</html>
71 changes: 71 additions & 0 deletions test/test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,77 @@ describe('linkinator', () => {
);
});

it('should detect relative urls with relative base', async () => {
const cases = [
{
fixture: 'test/fixtures/basetag/relative-to-root.html',
nonBrokenUrl: '/anotherBase/ok',
},
{
fixture: 'test/fixtures/basetag/relative-folder.html',
nonBrokenUrl: '/pageBase/anotherBase/ok',
},
{
fixture: 'test/fixtures/basetag/relative-dot-folder.html',
nonBrokenUrl: '/pageBase/anotherBase/ok',
},
{
fixture: 'test/fixtures/basetag/relative-page.html',
nonBrokenUrl: '/pageBase/ok',
},
{
fixture: 'test/fixtures/basetag/empty-base.html',
nonBrokenUrl: '/pageBase/ok',
},
];

for (let i = 0; i < cases.length; i++) {
const { fixture, nonBrokenUrl } = cases[i];
const scope = nock('http://fake.local')
.get('/pageBase/index')
.replyWithFile(200, fixture, {
'Content-Type': 'text/html; charset=UTF-8',
})
.head(nonBrokenUrl)
.reply(200);

const results = await check({
path: 'http://fake.local/pageBase/index',
});

assert.strictEqual(results.links.length, 3);
assert.strictEqual(
results.links.filter(x => x.state === LinkState.BROKEN).length,
1
);
scope.done();
}
});

it('should detect relative urls with absolute base', async () => {
const scope = nock('http://fake.local')
.get('/pageBase/index')
.replyWithFile(200, 'test/fixtures/basetag/absolute.html', {
'Content-Type': 'text/html; charset=UTF-8',
});

const anotherScope = nock('http://another.fake.local')
.head('/ok')
.reply(200);

const results = await check({
path: 'http://fake.local/pageBase/index',
});

assert.strictEqual(results.links.length, 3);
assert.strictEqual(
results.links.filter(x => x.state === LinkState.BROKEN).length,
1
);
scope.done();
anotherScope.done();
});

it('should detect broken image links', async () => {
const results = await check({ path: 'test/fixtures/image' });
assert.strictEqual(
Expand Down

0 comments on commit 9813a61

Please sign in to comment.