diff --git a/src/ldp/http/BasicTargetExtractor.ts b/src/ldp/http/BasicTargetExtractor.ts index 7138167ba..738db89d0 100644 --- a/src/ldp/http/BasicTargetExtractor.ts +++ b/src/ldp/http/BasicTargetExtractor.ts @@ -1,6 +1,7 @@ import type { TLSSocket } from 'tls'; import { format } from 'url'; import type { HttpRequest } from '../../server/HttpRequest'; +import { toCanonicalUrl } from '../../util/Util'; import type { ResourceIdentifier } from '../representation/ResourceIdentifier'; import { TargetExtractor } from './TargetExtractor'; @@ -28,6 +29,6 @@ export class BasicTargetExtractor extends TargetExtractor { pathname: input.url, }); - return { path: url }; + return { path: toCanonicalUrl(url) }; } } diff --git a/src/util/Util.ts b/src/util/Util.ts index 080f08178..5a75aeaea 100644 --- a/src/util/Util.ts +++ b/src/util/Util.ts @@ -66,3 +66,16 @@ export const pipeStreamsAndErrors = (readable: Readable, des readable.on('error', (error): boolean => destination.emit('error', new UnsupportedHttpError(error.message))); return destination; }; + +/** + * Converts a URL string to the "canonical" version that should be used internally for consistency. + * Decodes all percent encodings and then makes sure only the necessary characters are encoded again. + */ +export const toCanonicalUrl = (url: string): string => { + const match = /(\w+:\/\/[^/]+\/)(.*)/u.exec(url); + if (!match) { + throw new UnsupportedHttpError(`Invalid URL ${url}`); + } + const [ , domain, path ] = match; + return encodeURI(domain + path.split('/').map(decodeURIComponent).join('/')); +}; diff --git a/test/unit/ldp/http/BasicTargetExtractor.test.ts b/test/unit/ldp/http/BasicTargetExtractor.test.ts index 8d2f3d5fc..db79a4bfc 100644 --- a/test/unit/ldp/http/BasicTargetExtractor.test.ts +++ b/test/unit/ldp/http/BasicTargetExtractor.test.ts @@ -16,7 +16,8 @@ describe('A BasicTargetExtractor', (): void => { }); it('returns the input URL.', async(): Promise => { - await expect(extractor.handle({ url: 'url', headers: { host: 'test.com' }} as any)).resolves.toEqual({ path: 'http://test.com/url' }); + await expect(extractor.handle({ url: 'url', headers: { host: 'test.com' }} as any)) + .resolves.toEqual({ path: 'http://test.com/url' }); }); it('uses https protocol if the connection is secure.', async(): Promise => { @@ -24,4 +25,9 @@ describe('A BasicTargetExtractor', (): void => { { url: 'url', headers: { host: 'test.com' }, connection: { encrypted: true } as any } as any, )).resolves.toEqual({ path: 'https://test.com/url' }); }); + + it('decodes relevant percent encodings.', async(): Promise => { + await expect(extractor.handle({ url: '/a%20path%26/name', headers: { host: 'test.com' }} as any)) + .resolves.toEqual({ path: 'http://test.com/a%20path&/name' }); + }); }); diff --git a/test/unit/util/Util.test.ts b/test/unit/util/Util.test.ts index 284a53e31..c196b589f 100644 --- a/test/unit/util/Util.test.ts +++ b/test/unit/util/Util.test.ts @@ -1,5 +1,6 @@ import streamifyArray from 'streamify-array'; -import { ensureTrailingSlash, matchingMediaType, readableToString } from '../../../src/util/Util'; +import { UnsupportedHttpError } from '../../../src/util/errors/UnsupportedHttpError'; +import { ensureTrailingSlash, matchingMediaType, readableToString, toCanonicalUrl } from '../../../src/util/Util'; describe('Util function', (): void => { describe('ensureTrailingSlash', (): void => { @@ -31,4 +32,15 @@ describe('Util function', (): void => { expect(matchingMediaType('text/plain', 'text/turtle')).toBeFalsy(); }); }); + + describe('toCanonicalUrl', (): void => { + it('makes sure only the necessary parts are encoded.', async(): Promise => { + expect(toCanonicalUrl('http://test.com/a%20path%26/name')) + .toEqual('http://test.com/a%20path&/name'); + }); + + it('errors on invalid URLs.', async(): Promise => { + expect((): any => toCanonicalUrl('notAnUrl')).toThrow(new UnsupportedHttpError('Invalid URL notAnUrl')); + }); + }); });