From e85ca622da0c8e3ef8344332e162cfe327f74551 Mon Sep 17 00:00:00 2001 From: Joachim Van Herwegen Date: Tue, 13 Oct 2020 17:07:35 +0200 Subject: [PATCH] fix: Make sure all URI characters are correctly encoded --- src/ldp/http/BasicTargetExtractor.ts | 10 ++++---- src/storage/ExtensionBasedMapper.ts | 8 +++--- src/util/Util.ts | 25 +++++++++++-------- .../ldp/http/BasicTargetExtractor.test.ts | 10 ++++++-- test/unit/util/Util.test.ts | 25 +++++++++++++------ 5 files changed, 49 insertions(+), 29 deletions(-) diff --git a/src/ldp/http/BasicTargetExtractor.ts b/src/ldp/http/BasicTargetExtractor.ts index 738db89d0..8892c8ab4 100644 --- a/src/ldp/http/BasicTargetExtractor.ts +++ b/src/ldp/http/BasicTargetExtractor.ts @@ -1,7 +1,7 @@ import type { TLSSocket } from 'tls'; import { format } from 'url'; import type { HttpRequest } from '../../server/HttpRequest'; -import { toCanonicalUrl } from '../../util/Util'; +import { toCanonicalUriPath } from '../../util/Util'; import type { ResourceIdentifier } from '../representation/ResourceIdentifier'; import { TargetExtractor } from './TargetExtractor'; @@ -23,12 +23,12 @@ export class BasicTargetExtractor extends TargetExtractor { throw new Error('Missing host.'); } const isHttps = input.connection && (input.connection as TLSSocket).encrypted; - const url = format({ + const path = format({ protocol: `http${isHttps ? 's' : ''}`, - host: input.headers.host, - pathname: input.url, + host: toCanonicalUriPath(input.headers.host), + pathname: toCanonicalUriPath(input.url), }); - return { path: toCanonicalUrl(url) }; + return { path }; } } diff --git a/src/storage/ExtensionBasedMapper.ts b/src/storage/ExtensionBasedMapper.ts index 668032b46..07fcaa7a0 100644 --- a/src/storage/ExtensionBasedMapper.ts +++ b/src/storage/ExtensionBasedMapper.ts @@ -6,7 +6,7 @@ import { APPLICATION_OCTET_STREAM, TEXT_TURTLE } from '../util/ContentTypes'; import { ConflictHttpError } from '../util/errors/ConflictHttpError'; import { NotFoundHttpError } from '../util/errors/NotFoundHttpError'; import { UnsupportedHttpError } from '../util/errors/UnsupportedHttpError'; -import { trimTrailingSlashes } from '../util/Util'; +import { decodeUriPathComponents, encodeUriPathComponents, trimTrailingSlashes } from '../util/Util'; import type { FileIdentifierMapper, ResourceLink } from './FileIdentifierMapper'; const { join: joinPath, normalize: normalizePath } = posix; @@ -137,7 +137,7 @@ export class ExtensionBasedMapper implements FileIdentifierMapper { let relative = filePath.slice(this.rootFilepath.length); if (isContainer) { return { - identifier: { path: encodeURI(this.baseRequestURI + relative) }, + identifier: { path: this.baseRequestURI + encodeUriPathComponents(relative) }, filePath, }; } @@ -150,7 +150,7 @@ export class ExtensionBasedMapper implements FileIdentifierMapper { } return { - identifier: { path: encodeURI(this.baseRequestURI + relative) }, + identifier: { path: this.baseRequestURI + encodeUriPathComponents(relative) }, filePath, contentType, }; @@ -201,7 +201,7 @@ export class ExtensionBasedMapper implements FileIdentifierMapper { if (!identifier.path.startsWith(this.baseRequestURI)) { throw new NotFoundHttpError(); } - return decodeURI(identifier.path).slice(this.baseRequestURI.length); + return decodeUriPathComponents(identifier.path.slice(this.baseRequestURI.length)); } /** diff --git a/src/util/Util.ts b/src/util/Util.ts index 5a75aeaea..d28a26214 100644 --- a/src/util/Util.ts +++ b/src/util/Util.ts @@ -68,14 +68,19 @@ export const pipeStreamsAndErrors = (readable: Readable, des }; /** - * Converts a URL string to the "canonical" version that should be used internally for consistency. - * Decodes all percent encodings and then makes sure only the necessary characters are encoded again. + * Converts a URI path to the canonical version by splitting on slashes, + * decoding any percent-based encodings, + * and then encoding any special characters. */ -export const toCanonicalUrl = (url: string): string => { - const match = /(\w+:\/\/[^/]+\/)(.*)/u.exec(url); - if (!match) { - throw new UnsupportedHttpError(`Invalid URL ${url}`); - } - const [ , domain, path ] = match; - return encodeURI(domain + path.split('/').map(decodeURIComponent).join('/')); -}; +export const toCanonicalUriPath = (path: string): string => path.split('/').map((part): string => + encodeURIComponent(decodeURIComponent(part))).join('/'); + +/** + * Decodes all components of a URI path. + */ +export const decodeUriPathComponents = (path: string): string => path.split('/').map(decodeURIComponent).join('/'); + +/** + * Encodes all (non-slash) special characters in a URI path. + */ +export const encodeUriPathComponents = (path: string): string => path.split('/').map(encodeURIComponent).join('/'); diff --git a/test/unit/ldp/http/BasicTargetExtractor.test.ts b/test/unit/ldp/http/BasicTargetExtractor.test.ts index db79a4bfc..674fcf9e8 100644 --- a/test/unit/ldp/http/BasicTargetExtractor.test.ts +++ b/test/unit/ldp/http/BasicTargetExtractor.test.ts @@ -26,8 +26,14 @@ describe('A BasicTargetExtractor', (): void => { )).resolves.toEqual({ path: 'https://test.com/url' }); }); - it('decodes relevant percent encodings.', async(): Promise => { + it('encodes relevant characters.', async(): Promise => { await expect(extractor.handle({ url: '/a%20path%26/name', headers: { host: 'test.com' }} as any)) - .resolves.toEqual({ path: 'http://test.com/a%20path&/name' }); + .resolves.toEqual({ path: 'http://test.com/a%20path%26/name' }); + + await expect(extractor.handle({ url: '/a path%26/name', headers: { host: 'test.com' }} as any)) + .resolves.toEqual({ path: 'http://test.com/a%20path%26/name' }); + + await expect(extractor.handle({ url: '/path&%26/name', headers: { host: 'test.com' }} as any)) + .resolves.toEqual({ path: 'http://test.com/path%26%26/name' }); }); }); diff --git a/test/unit/util/Util.test.ts b/test/unit/util/Util.test.ts index c196b589f..ab903016f 100644 --- a/test/unit/util/Util.test.ts +++ b/test/unit/util/Util.test.ts @@ -1,6 +1,12 @@ import streamifyArray from 'streamify-array'; -import { UnsupportedHttpError } from '../../../src/util/errors/UnsupportedHttpError'; -import { ensureTrailingSlash, matchingMediaType, readableToString, toCanonicalUrl } from '../../../src/util/Util'; +import { + decodeUriPathComponents, + encodeUriPathComponents, + ensureTrailingSlash, + matchingMediaType, + readableToString, + toCanonicalUriPath, +} from '../../../src/util/Util'; describe('Util function', (): void => { describe('ensureTrailingSlash', (): void => { @@ -33,14 +39,17 @@ describe('Util function', (): void => { }); }); - describe('toCanonicalUrl', (): void => { - it('makes sure only the necessary parts are encoded.', async(): Promise => { - expect(toCanonicalUrl('http://test.com/a%20path%26/name')) - .toEqual('http://test.com/a%20path&/name'); + describe('UriPath functions', (): void => { + it('makes sure only the necessary parts are encoded with toCanonicalUriPath.', async(): Promise => { + expect(toCanonicalUriPath('/a%20path&/name')).toEqual('/a%20path%26/name'); }); - it('errors on invalid URLs.', async(): Promise => { - expect((): any => toCanonicalUrl('notAnUrl')).toThrow(new UnsupportedHttpError('Invalid URL notAnUrl')); + it('decodes all parts of a path with decodeUriPathComponents.', async(): Promise => { + expect(decodeUriPathComponents('/a%20path&/name')).toEqual('/a path&/name'); + }); + + it('encodes all parts of a path with encodeUriPathComponents.', async(): Promise => { + expect(encodeUriPathComponents('/a%20path&/name')).toEqual('/a%2520path%26/name'); }); }); });