diff --git a/src/util/PathUtil.ts b/src/util/PathUtil.ts index aa2f8a830..46541fa8f 100644 --- a/src/util/PathUtil.ts +++ b/src/util/PathUtil.ts @@ -5,6 +5,45 @@ import type { ResourceIdentifier } from '../http/representation/ResourceIdentifi import type { HttpRequest } from '../server/HttpRequest'; import { BadRequestHttpError } from './errors/BadRequestHttpError'; +// Characters to ignore when URL decoding the URI path to a file path. +const pathComponentDelimiters = [ '/', '\\' ]; +// Regex to find all instances of encoded path component delimiters. +const encodedDelimiterRegex = new RegExp( + `${pathComponentDelimiters.map((delimiter): string => encodeURIComponent(delimiter)).join('|')}`, 'giu', +); +// Mapping of the replacements to perform in the preventDelimiterDecoding helper function. +const preventDelimiterDecodingMap = Object.fromEntries(pathComponentDelimiters.map((delimiter): [string, string] => { + const encodedDelimiter = encodeURIComponent(delimiter); + return [ encodedDelimiter, encodeURIComponent(encodedDelimiter) ]; +})); +// Mapping of the replacements to perform in the preventDelimiterEncoding helper function. +const preventDelimiterEncodingMap = Object.fromEntries(pathComponentDelimiters.map((delimiter): [string, string] => { + const encodedDelimiter = encodeURIComponent(delimiter); + return [ encodedDelimiter, delimiter ]; +})); + +/** + * Prevents some characters from being URL decoded by escaping them. + * The characters to 'escape' are declared in codecExceptions. + * + * @param pathComponent - The path component to apply the escaping on. + * @returns A copy of the input path that is safe to apply URL decoding on. + */ +function preventDelimiterDecoding(pathComponent: string): string { + return pathComponent.replace(encodedDelimiterRegex, (delimiter): string => preventDelimiterDecodingMap[delimiter]); +} + +/** + * Prevents some characters from being URL encoded by escaping them. + * The characters to 'escape' are declared in codecExceptions. + * + * @param pathComponent - The path component to apply the escaping on. + * @returns A copy of the input path that is safe to apply URL encoding on. + */ +function preventDelimiterEncoding(pathComponent: string): string { + return pathComponent.replace(encodedDelimiterRegex, (delimiter): string => preventDelimiterEncodingMap[delimiter]); +} + /** * Changes a potential Windows path into a POSIX path. * @@ -118,6 +157,11 @@ function transformPathComponents(path: string, transform: (part: string) => stri /** * Converts a URI path to the canonical version by splitting on slashes, * decoding any percent-based encodings, and then encoding any special characters. + * This function is used to clean unwanted characters in the components of + * the provided path. + * + * @param path - The path to convert to its canonical URI path form. + * @returns The canonical URI path form of the provided path. */ export function toCanonicalUriPath(path: string): string { return transformPathComponents(path, (part): string => @@ -125,17 +169,29 @@ export function toCanonicalUriPath(path: string): string { } /** - * Decodes all components of a URI path. + * This function is used when converting a URI to a file path. Decodes all components of a URI path, + * with the exception of encoded slash characters, as this would lead to unexpected file locations + * being targeted (resulting in erroneous behaviour of the file based backend). + * + * @param path - The path to decode the URI path components of. + * @returns A decoded copy of the provided URI path (ignoring encoded slash characters). */ export function decodeUriPathComponents(path: string): string { - return transformPathComponents(path, decodeURIComponent); + return transformPathComponents(path, (part): string => + decodeURIComponent(preventDelimiterDecoding(part))); } /** - * Encodes all (non-slash) special characters in a URI path. + * This function is used in the process of converting a file path to a URI. Encodes all (non-slash) + * special characters in a URI path, with the exception of encoded slash characters, as this would + * lead to unnecessary double encoding, resulting in a URI that differs from the expected result. + * + * @param path - The path to encode the URI path components of. + * @returns An encoded copy of the provided URI path (ignoring encoded slash characters). */ export function encodeUriPathComponents(path: string): string { - return transformPathComponents(path, encodeURIComponent); + return transformPathComponents(path, (part): string => + encodeURIComponent(preventDelimiterEncoding(part))); } /** diff --git a/test/integration/FileBackendEncodedSlashHandling.test.ts b/test/integration/FileBackendEncodedSlashHandling.test.ts new file mode 100644 index 000000000..a4e6293ca --- /dev/null +++ b/test/integration/FileBackendEncodedSlashHandling.test.ts @@ -0,0 +1,149 @@ +import fetch from 'cross-fetch'; +import { pathExists } from 'fs-extra'; +import type { App } from '../../src/init/App'; +import { getPort } from '../util/Util'; +import { + getDefaultVariables, + getTestConfigPath, + getTestFolder, + instantiateFromConfig, + removeFolder, +} from './Config'; + +const port = getPort('FileBackendEncodedSlashHandling'); +const baseUrl = `http://localhost:${port}/`; + +const rootFilePath = getTestFolder('file-backend-encoded-slash-handling'); + +describe('A server with a file backend storage', (): void => { + let app: App; + + beforeAll(async(): Promise => { + await removeFolder(rootFilePath); + const variables = { + ...getDefaultVariables(port, baseUrl), + 'urn:solid-server:default:variable:rootFilePath': rootFilePath, + }; + + // Create and start the server + const instances = await instantiateFromConfig( + 'urn:solid-server:test:Instances', + [ + getTestConfigPath('server-file.json'), + ], + variables, + ) as Record; + ({ app } = instances); + + await app.start(); + }); + + afterAll(async(): Promise => { + // Await removeFolder(rootFilePath); + await app.stop(); + }); + + it('can put a document for which the URI path contains url encoded separator characters.', async(): Promise => { + const url = `${baseUrl}/c1/c2/t1%2F`; + const res = await fetch(url, { + method: 'PUT', + headers: { + 'content-type': 'text/plain', + }, + body: 'abc', + }); + expect(res.status).toBe(201); + expect(res.headers.get('location')).toBe(url); + + // The resource should not be accessible through ${baseUrl}/c1/c2/t1/. + const check1 = await fetch(`${baseUrl}/c1/c2/t1/}`, { + method: 'GET', + headers: { + accept: 'text/plain', + }, + }); + expect(check1.status).toBe(404); + + // Check that the created resource is not a container + const check2 = await fetch(url, { + method: 'GET', + headers: { + accept: 'text/plain', + }, + }); + const linkHeaderValues = check2.headers.get('link')!.split(',').map((item): string => item.trim()); + expect(linkHeaderValues).not.toContain('; rel="type"'); + + // Check that the appropriate file path exists + const check3 = await pathExists(`${rootFilePath}/c1/c2/t1%2F$.txt`); + expect(check3).toBe(true); + }); + + it('can post a document using a slug that contains url encoded separator characters.', async(): Promise => { + const slug = 't1%2Faa'; + const res = await fetch(baseUrl, { + method: 'POST', + headers: { + 'content-type': 'text/plain', + slug, + }, + body: 'abc', + }); + expect(res.status).toBe(201); + expect(res.headers.get('location')).toBe(`${baseUrl}${slug}`); + + // Check that the the appropriate file path exists + const check = await pathExists(`${rootFilePath}/${slug}$.txt`); + expect(check).toBe(true); + }); + + it('prevents accessing a document via a different identifier that results in the same path after url decoding.', + async(): Promise => { + // First put a resource using a path without encoded separator characters: foo/bar + const url = `${baseUrl}/foo/bar`; + await fetch(url, { + method: 'PUT', + headers: { + 'content-type': 'text/plain', + }, + body: 'abc', + }); + + // The resource at foo/bar should not be accessible using the url encoded variant of this path: foo%2Fbar + const check1 = await fetch(`${baseUrl}/foo%2Fbar`, { + method: 'GET', + headers: { + accept: 'text/plain', + }, + }); + // Expect foo%2Fbar to correctly refer to a different document, which does not exist. + expect(check1.status).toBe(404); + + // Check that the the appropriate file path for foo/bar exists + const check2 = await pathExists(`${rootFilePath}/foo/bar$.txt`); + expect(check2).toBe(true); + + // Next, put a resource using a path with an encoded separator character: bar%2Ffoo + await fetch(`${baseUrl}/bar%2Ffoo`, { + method: 'PUT', + headers: { + 'content-type': 'text/plain', + }, + body: 'abc', + }); + + // The resource at bar%2Ffoo should not be accessible through bar/foo + const check3 = await fetch(`${baseUrl}/bar/foo`, { + method: 'GET', + headers: { + accept: 'text/plain', + }, + }); + // Expect bar/foo to correctly refer to a different document, which does not exist. + expect(check3.status).toBe(404); + + // Check that the the appropriate file path for bar%foo exists + const check4 = await pathExists(`${rootFilePath}/bar%2Ffoo$.txt`); + expect(check4).toBe(true); + }); +}); diff --git a/test/integration/config/server-file.json b/test/integration/config/server-file.json new file mode 100644 index 000000000..9b5fe98aa --- /dev/null +++ b/test/integration/config/server-file.json @@ -0,0 +1,54 @@ +{ + "@context": "https://linkedsoftwaredependencies.org/bundles/npm/@solid/community-server/^4.0.0/components/context.jsonld", + "import": [ + "files-scs:config/app/main/default.json", + "files-scs:config/app/init/initialize-root.json", + "files-scs:config/app/setup/disabled.json", + "files-scs:config/http/handler/default.json", + "files-scs:config/http/middleware/websockets.json", + "files-scs:config/http/server-factory/websockets.json", + "files-scs:config/http/static/default.json", + "files-scs:config/identity/access/public.json", + "files-scs:config/identity/handler/default.json", + "files-scs:config/identity/ownership/token.json", + "files-scs:config/identity/pod/static.json", + "files-scs:config/identity/registration/enabled.json", + "files-scs:config/ldp/authentication/dpop-bearer.json", + "files-scs:config/ldp/authorization/webacl.json", + "files-scs:config/ldp/handler/default.json", + "files-scs:config/ldp/metadata-parser/default.json", + "files-scs:config/ldp/metadata-writer/default.json", + "files-scs:config/ldp/modes/default.json", + "files-scs:config/storage/backend/file.json", + "files-scs:config/storage/key-value/resource-store.json", + "files-scs:config/storage/middleware/default.json", + "files-scs:config/util/auxiliary/acl.json", + "files-scs:config/util/identifiers/suffix.json", + "files-scs:config/util/index/default.json", + "files-scs:config/util/logging/winston.json", + "files-scs:config/util/representation-conversion/default.json", + "files-scs:config/util/resource-locker/memory.json", + "files-scs:config/util/variables/default.json" + ], + "@graph": [ + { + "@id": "urn:solid-server:test:Instances", + "@type": "RecordObject", + "RecordObject:_record": [ + { + "RecordObject:_record_key": "app", + "RecordObject:_record_value": { "@id": "urn:solid-server:default:App" } + } + ] + }, + { + "@id": "urn:solid-server:default:EmailSender", + "@type": "BaseEmailSender", + "args_senderName": "Solid Server", + "args_emailConfig_host": "smtp.example.email", + "args_emailConfig_port": 587, + "args_emailConfig_auth_user": "alice@example.email", + "args_emailConfig_auth_pass": "NYEaCsqV7aVStRCbmC" + } + ] +} diff --git a/test/unit/util/PathUtil.test.ts b/test/unit/util/PathUtil.test.ts index 0d5c49ee4..2ae158e58 100644 --- a/test/unit/util/PathUtil.test.ts +++ b/test/unit/util/PathUtil.test.ts @@ -112,6 +112,11 @@ describe('PathUtil', (): void => { it('leaves the query string untouched.', (): void => { expect(decodeUriPathComponents('/a%20path&/name?abc=def&xyz')).toBe('/a path&/name?abc=def&xyz'); }); + + it('ignores url encoded path separator characters.', (): void => { + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%2F')).toBe('/a path&/c1/c2/t1%2F'); + expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%5C')).toBe('/a path&/c1/c2/t1%5C'); + }); }); describe('#encodeUriPathComponents', (): void => { @@ -122,6 +127,11 @@ describe('PathUtil', (): void => { it('leaves the query string untouched.', (): void => { expect(encodeUriPathComponents('/a%20path&/name?abc=def&xyz')).toBe('/a%2520path%26/name?abc=def&xyz'); }); + + it('does not double-encode url encoded path separator characters.', (): void => { + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%2F')).toBe('/a%2520path%26/c1/c2/t1%2F'); + expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%5C')).toBe('/a%2520path%26/c1/c2/t1%5C'); + }); }); describe('#isContainerPath', (): void => { diff --git a/test/util/Util.ts b/test/util/Util.ts index e96f2c1af..c6563b3ef 100644 --- a/test/util/Util.ts +++ b/test/util/Util.ts @@ -8,6 +8,7 @@ const portNames = [ 'ContentNegotiation', 'DynamicPods', 'ExpiringDataCleanup', + 'FileBackendEncodedSlashHandling', 'GlobalQuota', 'Identity', 'LpdHandlerWithAuth',