feat: Decode URI in target extractor

This commit is contained in:
Joachim Van Herwegen 2020-10-06 10:46:18 +02:00
parent b47dc3f7f6
commit bb28af937b
4 changed files with 35 additions and 3 deletions

View File

@ -1,6 +1,7 @@
import type { TLSSocket } from 'tls';
import { format } from 'url';
import type { HttpRequest } from '../../server/HttpRequest';
import { toCanonicalUrl } from '../../util/Util';
import type { ResourceIdentifier } from '../representation/ResourceIdentifier';
import { TargetExtractor } from './TargetExtractor';
@ -28,6 +29,6 @@ export class BasicTargetExtractor extends TargetExtractor {
pathname: input.url,
});
return { path: url };
return { path: toCanonicalUrl(url) };
}
}

View File

@ -66,3 +66,16 @@ export const pipeStreamsAndErrors = <T extends Writable>(readable: Readable, des
readable.on('error', (error): boolean => destination.emit('error', new UnsupportedHttpError(error.message)));
return destination;
};
/**
* Converts a URL string to the "canonical" version that should be used internally for consistency.
* Decodes all percent encodings and then makes sure only the necessary characters are encoded again.
*/
export const toCanonicalUrl = (url: string): string => {
const match = /(\w+:\/\/[^/]+\/)(.*)/u.exec(url);
if (!match) {
throw new UnsupportedHttpError(`Invalid URL ${url}`);
}
const [ , domain, path ] = match;
return encodeURI(domain + path.split('/').map(decodeURIComponent).join('/'));
};

View File

@ -16,7 +16,8 @@ describe('A BasicTargetExtractor', (): void => {
});
it('returns the input URL.', async(): Promise<void> => {
await expect(extractor.handle({ url: 'url', headers: { host: 'test.com' }} as any)).resolves.toEqual({ path: 'http://test.com/url' });
await expect(extractor.handle({ url: 'url', headers: { host: 'test.com' }} as any))
.resolves.toEqual({ path: 'http://test.com/url' });
});
it('uses https protocol if the connection is secure.', async(): Promise<void> => {
@ -24,4 +25,9 @@ describe('A BasicTargetExtractor', (): void => {
{ url: 'url', headers: { host: 'test.com' }, connection: { encrypted: true } as any } as any,
)).resolves.toEqual({ path: 'https://test.com/url' });
});
it('decodes relevant percent encodings.', async(): Promise<void> => {
await expect(extractor.handle({ url: '/a%20path%26/name', headers: { host: 'test.com' }} as any))
.resolves.toEqual({ path: 'http://test.com/a%20path&/name' });
});
});

View File

@ -1,5 +1,6 @@
import streamifyArray from 'streamify-array';
import { ensureTrailingSlash, matchingMediaType, readableToString } from '../../../src/util/Util';
import { UnsupportedHttpError } from '../../../src/util/errors/UnsupportedHttpError';
import { ensureTrailingSlash, matchingMediaType, readableToString, toCanonicalUrl } from '../../../src/util/Util';
describe('Util function', (): void => {
describe('ensureTrailingSlash', (): void => {
@ -31,4 +32,15 @@ describe('Util function', (): void => {
expect(matchingMediaType('text/plain', 'text/turtle')).toBeFalsy();
});
});
describe('toCanonicalUrl', (): void => {
it('makes sure only the necessary parts are encoded.', async(): Promise<void> => {
expect(toCanonicalUrl('http://test.com/a%20path%26/name'))
.toEqual('http://test.com/a%20path&/name');
});
it('errors on invalid URLs.', async(): Promise<void> => {
expect((): any => toCanonicalUrl('notAnUrl')).toThrow(new UnsupportedHttpError('Invalid URL notAnUrl'));
});
});
});