diff --git a/src/util/PathUtil.ts b/src/util/PathUtil.ts index 46541fa8f..b5745f351 100644 --- a/src/util/PathUtil.ts +++ b/src/util/PathUtil.ts @@ -5,45 +5,6 @@ import type { ResourceIdentifier } from '../http/representation/ResourceIdentifi import type { HttpRequest } from '../server/HttpRequest'; import { BadRequestHttpError } from './errors/BadRequestHttpError'; -// Characters to ignore when URL decoding the URI path to a file path. -const pathComponentDelimiters = [ '/', '\\' ]; -// Regex to find all instances of encoded path component delimiters. -const encodedDelimiterRegex = new RegExp( - `${pathComponentDelimiters.map((delimiter): string => encodeURIComponent(delimiter)).join('|')}`, 'giu', -); -// Mapping of the replacements to perform in the preventDelimiterDecoding helper function. -const preventDelimiterDecodingMap = Object.fromEntries(pathComponentDelimiters.map((delimiter): [string, string] => { - const encodedDelimiter = encodeURIComponent(delimiter); - return [ encodedDelimiter, encodeURIComponent(encodedDelimiter) ]; -})); -// Mapping of the replacements to perform in the preventDelimiterEncoding helper function. -const preventDelimiterEncodingMap = Object.fromEntries(pathComponentDelimiters.map((delimiter): [string, string] => { - const encodedDelimiter = encodeURIComponent(delimiter); - return [ encodedDelimiter, delimiter ]; -})); - -/** - * Prevents some characters from being URL decoded by escaping them. - * The characters to 'escape' are declared in codecExceptions. - * - * @param pathComponent - The path component to apply the escaping on. - * @returns A copy of the input path that is safe to apply URL decoding on. - */ -function preventDelimiterDecoding(pathComponent: string): string { - return pathComponent.replace(encodedDelimiterRegex, (delimiter): string => preventDelimiterDecodingMap[delimiter]); -} - -/** - * Prevents some characters from being URL encoded by escaping them. - * The characters to 'escape' are declared in codecExceptions. - * - * @param pathComponent - The path component to apply the escaping on. - * @returns A copy of the input path that is safe to apply URL encoding on. - */ -function preventDelimiterEncoding(pathComponent: string): string { - return pathComponent.replace(encodedDelimiterRegex, (delimiter): string => preventDelimiterEncodingMap[delimiter]); -} - /** * Changes a potential Windows path into a POSIX path. * @@ -146,11 +107,25 @@ export function getExtension(path: string): string { } /** - * Performs a transformation on the path components of a URI. + * Performs a transformation on the path components of a URI, + * preserving but normalizing path delimiters and their escaped forms. */ function transformPathComponents(path: string, transform: (part: string) => string): string { const [ , base, queryString ] = /^([^?]*)(.*)$/u.exec(path)!; - const transformed = base.split('/').map((element): string => transform(element)).join('/'); + const transformed = base + // We split on actual URI path component delimiters (slash and backslash), + // but also on things that could be wrongly interpreted as component delimiters, + // such that they cannot be transformed incorrectly. + // We thus ensure that encoded slashes (%2F) and backslashes (%5C) are preserved, + // since they would become _actual_ delimiters if accidentally decoded. + // Additionally, we need to preserve any encoded percent signs (%25) + // that precede them, because these might change their interpretation as well. + .split(/(\/|\\|%(?:25)*(?:2f|5c))/ui) + // Even parts map to components that need to be transformed, + // odd parts to (possibly escaped) delimiters that need to be normalized. + .map((part, index): string => + index % 2 === 0 ? transform(part) : part.toUpperCase()) + .join(''); return !queryString ? transformed : `${transformed}${queryString}`; } @@ -177,8 +152,7 @@ export function toCanonicalUriPath(path: string): string { * @returns A decoded copy of the provided URI path (ignoring encoded slash characters). */ export function decodeUriPathComponents(path: string): string { - return transformPathComponents(path, (part): string => - decodeURIComponent(preventDelimiterDecoding(part))); + return transformPathComponents(path, decodeURIComponent); } /** @@ -190,8 +164,7 @@ export function decodeUriPathComponents(path: string): string { * @returns An encoded copy of the provided URI path (ignoring encoded slash characters). */ export function encodeUriPathComponents(path: string): string { - return transformPathComponents(path, (part): string => - encodeURIComponent(preventDelimiterEncoding(part))); + return transformPathComponents(path, encodeURIComponent); } /** diff --git a/test/integration/FileBackendEncodedSlashHandling.test.ts b/test/integration/FileBackendEncodedSlashHandling.test.ts index a4e6293ca..ad09a3992 100644 --- a/test/integration/FileBackendEncodedSlashHandling.test.ts +++ b/test/integration/FileBackendEncodedSlashHandling.test.ts @@ -39,12 +39,12 @@ describe('A server with a file backend storage', (): void => { }); afterAll(async(): Promise => { - // Await removeFolder(rootFilePath); + await removeFolder(rootFilePath); await app.stop(); }); - it('can put a document for which the URI path contains url encoded separator characters.', async(): Promise => { - const url = `${baseUrl}/c1/c2/t1%2F`; + it('can put a document for which the URI path contains URL-encoded separator characters.', async(): Promise => { + const url = `${baseUrl}c1/c2/t1%2f`; const res = await fetch(url, { method: 'PUT', headers: { @@ -53,10 +53,10 @@ describe('A server with a file backend storage', (): void => { body: 'abc', }); expect(res.status).toBe(201); - expect(res.headers.get('location')).toBe(url); + expect(res.headers.get('location')).toBe(`${baseUrl}c1/c2/t1%2F`); - // The resource should not be accessible through ${baseUrl}/c1/c2/t1/. - const check1 = await fetch(`${baseUrl}/c1/c2/t1/}`, { + // The resource should not be accessible through ${baseUrl}c1/c2/t1/. + const check1 = await fetch(`${baseUrl}c1/c2/t1/}`, { method: 'GET', headers: { accept: 'text/plain', @@ -79,7 +79,7 @@ describe('A server with a file backend storage', (): void => { expect(check3).toBe(true); }); - it('can post a document using a slug that contains url encoded separator characters.', async(): Promise => { + it('can post a document using a slug that contains URL-encoded separator characters.', async(): Promise => { const slug = 't1%2Faa'; const res = await fetch(baseUrl, { method: 'POST', @@ -92,15 +92,15 @@ describe('A server with a file backend storage', (): void => { expect(res.status).toBe(201); expect(res.headers.get('location')).toBe(`${baseUrl}${slug}`); - // Check that the the appropriate file path exists + // Check that the appropriate file path exists const check = await pathExists(`${rootFilePath}/${slug}$.txt`); expect(check).toBe(true); }); - it('prevents accessing a document via a different identifier that results in the same path after url decoding.', + it('prevents accessing a document via a different identifier that results in the same path after URL decoding.', async(): Promise => { // First put a resource using a path without encoded separator characters: foo/bar - const url = `${baseUrl}/foo/bar`; + const url = `${baseUrl}foo/bar`; await fetch(url, { method: 'PUT', headers: { @@ -110,7 +110,7 @@ describe('A server with a file backend storage', (): void => { }); // The resource at foo/bar should not be accessible using the url encoded variant of this path: foo%2Fbar - const check1 = await fetch(`${baseUrl}/foo%2Fbar`, { + const check1 = await fetch(`${baseUrl}foo%2Fbar`, { method: 'GET', headers: { accept: 'text/plain', @@ -124,7 +124,7 @@ describe('A server with a file backend storage', (): void => { expect(check2).toBe(true); // Next, put a resource using a path with an encoded separator character: bar%2Ffoo - await fetch(`${baseUrl}/bar%2Ffoo`, { + await fetch(`${baseUrl}bar%2Ffoo`, { method: 'PUT', headers: { 'content-type': 'text/plain', @@ -133,7 +133,7 @@ describe('A server with a file backend storage', (): void => { }); // The resource at bar%2Ffoo should not be accessible through bar/foo - const check3 = await fetch(`${baseUrl}/bar/foo`, { + const check3 = await fetch(`${baseUrl}bar/foo`, { method: 'GET', headers: { accept: 'text/plain', diff --git a/test/unit/util/PathUtil.test.ts b/test/unit/util/PathUtil.test.ts index 2ae158e58..4eb681a26 100644 --- a/test/unit/util/PathUtil.test.ts +++ b/test/unit/util/PathUtil.test.ts @@ -113,9 +113,30 @@ describe('PathUtil', (): void => { expect(decodeUriPathComponents('/a%20path&/name?abc=def&xyz')).toBe('/a path&/name?abc=def&xyz'); }); - it('ignores url encoded path separator characters.', (): void => { + it('ignores URL-encoded path separator characters.', (): void => { expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%2F')).toBe('/a path&/c1/c2/t1%2F'); expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%5C')).toBe('/a path&/c1/c2/t1%5C'); + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%252F')).toBe('/a path&/c1/c2/t1%252F'); + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%255C')).toBe('/a path&/c1/c2/t1%255C'); + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%25%252F')).toBe('/a path&/c1/c2/t1%%252F'); + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%25%255C')).toBe('/a path&/c1/c2/t1%%255C'); + }); + + it('normalizes to uppercase encoding.', (): void => { + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%2f')).toBe('/a path&/c1/c2/t1%2F'); + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%5c')).toBe('/a path&/c1/c2/t1%5C'); + }); + + it('accepts paths with mixed lowercase and uppercase encoding.', (): void => { + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%2F%2f')).toBe('/a path&/c1/c2/t1%2F%2F'); + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%5C%5c')).toBe('/a path&/c1/c2/t1%5C%5C'); + }); + + it('takes sequences of encoded percent signs into account.', (): void => { + expect(decodeUriPathComponents('/a%2Fb')).toBe('/a%2Fb'); + expect(decodeUriPathComponents('/a%252Fb')).toBe('/a%252Fb'); + expect(decodeUriPathComponents('/a%25252Fb')).toBe('/a%25252Fb'); + expect(decodeUriPathComponents('/a%2525252Fb')).toBe('/a%2525252Fb'); }); }); @@ -128,9 +149,30 @@ describe('PathUtil', (): void => { expect(encodeUriPathComponents('/a%20path&/name?abc=def&xyz')).toBe('/a%2520path%26/name?abc=def&xyz'); }); - it('does not double-encode url encoded path separator characters.', (): void => { + it('does not double-encode URL-encoded path separator characters.', (): void => { expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%2F')).toBe('/a%2520path%26/c1/c2/t1%2F'); expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%5C')).toBe('/a%2520path%26/c1/c2/t1%5C'); + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%252F')).toBe('/a%2520path%26/c1/c2/t1%252F'); + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%255C')).toBe('/a%2520path%26/c1/c2/t1%255C'); + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%%252F')).toBe('/a%2520path%26/c1/c2/t1%25%252F'); + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%%255C')).toBe('/a%2520path%26/c1/c2/t1%25%255C'); + }); + + it('normalizes to uppercase encoding.', (): void => { + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%2f')).toBe('/a%2520path%26/c1/c2/t1%2F'); + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%5c')).toBe('/a%2520path%26/c1/c2/t1%5C'); + }); + + it('accepts paths with mixed lowercase and uppercase encoding.', (): void => { + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%2F%2f')).toBe('/a%2520path%26/c1/c2/t1%2F%2F'); + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%5C%5c')).toBe('/a%2520path%26/c1/c2/t1%5C%5C'); + }); + + it('takes sequences of encoded percent signs into account.', (): void => { + expect(encodeUriPathComponents('/a%2Fb')).toBe('/a%2Fb'); + expect(encodeUriPathComponents('/a%252Fb')).toBe('/a%252Fb'); + expect(encodeUriPathComponents('/a%25252Fb')).toBe('/a%25252Fb'); + expect(encodeUriPathComponents('/a%2525252Fb')).toBe('/a%2525252Fb'); }); });