fix: Make delimiter encoding case-insensitive.

2024-10-03 14:55:10 +00:00 · 2022-04-22 00:16:49 +02:00
parent dbdb9b424e
commit 50469e2c1f
3 changed files with 75 additions and 60 deletions
--- a/src/util/PathUtil.ts
+++ b/src/util/PathUtil.ts
@@ -5,45 +5,6 @@ import type { ResourceIdentifier } from '../http/representation/ResourceIdentifi
 import type { HttpRequest } from '../server/HttpRequest';
 import { BadRequestHttpError } from './errors/BadRequestHttpError';

-// Characters to ignore when URL decoding the URI path to a file path.
-const pathComponentDelimiters = [ '/', '\\' ];
-// Regex to find all instances of encoded path component delimiters.
-const encodedDelimiterRegex = new RegExp(
-  `${pathComponentDelimiters.map((delimiter): string => encodeURIComponent(delimiter)).join('|')}`, 'giu',
-);
-// Mapping of the replacements to perform in the preventDelimiterDecoding helper function.
-const preventDelimiterDecodingMap = Object.fromEntries(pathComponentDelimiters.map((delimiter): [string, string] => {
-  const encodedDelimiter = encodeURIComponent(delimiter);
-  return [ encodedDelimiter, encodeURIComponent(encodedDelimiter) ];
-}));
-// Mapping of the replacements to perform in the preventDelimiterEncoding helper function.
-const preventDelimiterEncodingMap = Object.fromEntries(pathComponentDelimiters.map((delimiter): [string, string] => {
-  const encodedDelimiter = encodeURIComponent(delimiter);
-  return [ encodedDelimiter, delimiter ];
-}));
-
-/**
- * Prevents some characters from being URL decoded by escaping them.
- * The characters to 'escape' are declared in codecExceptions.
- *
- * @param pathComponent - The path component to apply the escaping on.
- * @returns A copy of the input path that is safe to apply URL decoding on.
- */
-function preventDelimiterDecoding(pathComponent: string): string {
-  return pathComponent.replace(encodedDelimiterRegex, (delimiter): string => preventDelimiterDecodingMap[delimiter]);
-}
-
-/**
- * Prevents some characters from being URL encoded by escaping them.
- * The characters to 'escape' are declared in codecExceptions.
- *
- * @param pathComponent - The path component to apply the escaping on.
- * @returns A copy of the input path that is safe to apply URL encoding on.
- */
-function preventDelimiterEncoding(pathComponent: string): string {
-  return pathComponent.replace(encodedDelimiterRegex, (delimiter): string => preventDelimiterEncodingMap[delimiter]);
-}
-
 /**
 * Changes a potential Windows path into a POSIX path.
 *
@@ -146,11 +107,25 @@ export function getExtension(path: string): string {
 }

 /**
- * Performs a transformation on the path components of a URI.
+ * Performs a transformation on the path components of a URI,
+ * preserving but normalizing path delimiters and their escaped forms.
 */
 function transformPathComponents(path: string, transform: (part: string) => string): string {
  const [ , base, queryString ] = /^([^?]*)(.*)$/u.exec(path)!;
-  const transformed = base.split('/').map((element): string => transform(element)).join('/');
+  const transformed = base
+    // We split on actual URI path component delimiters (slash and backslash),
+    // but also on things that could be wrongly interpreted as component delimiters,
+    // such that they cannot be transformed incorrectly.
+    // We thus ensure that encoded slashes (%2F) and backslashes (%5C) are preserved,
+    // since they would become _actual_ delimiters if accidentally decoded.
+    // Additionally, we need to preserve any encoded percent signs (%25)
+    // that precede them, because these might change their interpretation as well.
+    .split(/(\/|\\|%(?:25)*(?:2f|5c))/ui)
+    // Even parts map to components that need to be transformed,
+    // odd parts to (possibly escaped) delimiters that need to be normalized.
+    .map((part, index): string =>
+      index % 2 === 0 ? transform(part) : part.toUpperCase())
+    .join('');
  return !queryString ? transformed : `${transformed}${queryString}`;
 }

@@ -177,8 +152,7 @@ export function toCanonicalUriPath(path: string): string {
 * @returns A decoded copy of the provided URI path (ignoring encoded slash characters).
 */
 export function decodeUriPathComponents(path: string): string {
-  return transformPathComponents(path, (part): string =>
-    decodeURIComponent(preventDelimiterDecoding(part)));
+  return transformPathComponents(path, decodeURIComponent);
 }

 /**
@@ -190,8 +164,7 @@ export function decodeUriPathComponents(path: string): string {
 * @returns An encoded copy of the provided URI path (ignoring encoded slash characters).
 */
 export function encodeUriPathComponents(path: string): string {
-  return transformPathComponents(path, (part): string =>
-    encodeURIComponent(preventDelimiterEncoding(part)));
+  return transformPathComponents(path, encodeURIComponent);
 }

 /**
--- a/test/integration/FileBackendEncodedSlashHandling.test.ts
+++ b/test/integration/FileBackendEncodedSlashHandling.test.ts
@@ -39,12 +39,12 @@ describe('A server with a file backend storage', (): void => {
  });

  afterAll(async(): Promise<void> => {
-    // Await removeFolder(rootFilePath);
+    await removeFolder(rootFilePath);
    await app.stop();
  });

-  it('can put a document for which the URI path contains url encoded separator characters.', async(): Promise<void> => {
-    const url = `${baseUrl}/c1/c2/t1%2F`;
+  it('can put a document for which the URI path contains URL-encoded separator characters.', async(): Promise<void> => {
+    const url = `${baseUrl}c1/c2/t1%2f`;
    const res = await fetch(url, {
      method: 'PUT',
      headers: {
@@ -53,10 +53,10 @@ describe('A server with a file backend storage', (): void => {
      body: 'abc',
    });
    expect(res.status).toBe(201);
-    expect(res.headers.get('location')).toBe(url);
+    expect(res.headers.get('location')).toBe(`${baseUrl}c1/c2/t1%2F`);

-    // The resource should not be accessible through ${baseUrl}/c1/c2/t1/.
-    const check1 = await fetch(`${baseUrl}/c1/c2/t1/}`, {
+    // The resource should not be accessible through ${baseUrl}c1/c2/t1/.
+    const check1 = await fetch(`${baseUrl}c1/c2/t1/}`, {
      method: 'GET',
      headers: {
        accept: 'text/plain',
@@ -79,7 +79,7 @@ describe('A server with a file backend storage', (): void => {
    expect(check3).toBe(true);
  });

-  it('can post a document using a slug that contains url encoded separator characters.', async(): Promise<void> => {
+  it('can post a document using a slug that contains URL-encoded separator characters.', async(): Promise<void> => {
    const slug = 't1%2Faa';
    const res = await fetch(baseUrl, {
      method: 'POST',
@@ -92,15 +92,15 @@ describe('A server with a file backend storage', (): void => {
    expect(res.status).toBe(201);
    expect(res.headers.get('location')).toBe(`${baseUrl}${slug}`);

-    // Check that the the appropriate file path exists
+    // Check that the appropriate file path exists
    const check = await pathExists(`${rootFilePath}/${slug}$.txt`);
    expect(check).toBe(true);
  });

-  it('prevents accessing a document via a different identifier that results in the same path after url decoding.',
+  it('prevents accessing a document via a different identifier that results in the same path after URL decoding.',
    async(): Promise<void> => {
      // First put a resource using a path without encoded separator characters: foo/bar
-      const url = `${baseUrl}/foo/bar`;
+      const url = `${baseUrl}foo/bar`;
      await fetch(url, {
        method: 'PUT',
        headers: {
@@ -110,7 +110,7 @@ describe('A server with a file backend storage', (): void => {
      });

      // The resource at foo/bar should not be accessible using the url encoded variant of this path: foo%2Fbar
-      const check1 = await fetch(`${baseUrl}/foo%2Fbar`, {
+      const check1 = await fetch(`${baseUrl}foo%2Fbar`, {
        method: 'GET',
        headers: {
          accept: 'text/plain',
@@ -124,7 +124,7 @@ describe('A server with a file backend storage', (): void => {
      expect(check2).toBe(true);

      // Next, put a resource using a path with an encoded separator character: bar%2Ffoo
-      await fetch(`${baseUrl}/bar%2Ffoo`, {
+      await fetch(`${baseUrl}bar%2Ffoo`, {
        method: 'PUT',
        headers: {
          'content-type': 'text/plain',
@@ -133,7 +133,7 @@ describe('A server with a file backend storage', (): void => {
      });

      // The resource at bar%2Ffoo should not be accessible through bar/foo
-      const check3 = await fetch(`${baseUrl}/bar/foo`, {
+      const check3 = await fetch(`${baseUrl}bar/foo`, {
        method: 'GET',
        headers: {
          accept: 'text/plain',
--- a/test/unit/util/PathUtil.test.ts
+++ b/test/unit/util/PathUtil.test.ts
@@ -113,9 +113,30 @@ describe('PathUtil', (): void => {
      expect(decodeUriPathComponents('/a%20path&/name?abc=def&xyz')).toBe('/a path&/name?abc=def&xyz');
    });

-    it('ignores url encoded path separator characters.', (): void => {
+    it('ignores URL-encoded path separator characters.', (): void => {
      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%2F')).toBe('/a path&/c1/c2/t1%2F');
      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%5C')).toBe('/a path&/c1/c2/t1%5C');
+      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%252F')).toBe('/a path&/c1/c2/t1%252F');
+      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%255C')).toBe('/a path&/c1/c2/t1%255C');
+      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%25%252F')).toBe('/a path&/c1/c2/t1%%252F');
+      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%25%255C')).toBe('/a path&/c1/c2/t1%%255C');
+    });
+
+    it('normalizes to uppercase encoding.', (): void => {
+      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%2f')).toBe('/a path&/c1/c2/t1%2F');
+      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%5c')).toBe('/a path&/c1/c2/t1%5C');
+    });
+
+    it('accepts paths with mixed lowercase and uppercase encoding.', (): void => {
+      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%2F%2f')).toBe('/a path&/c1/c2/t1%2F%2F');
+      expect(decodeUriPathComponents('/a%20path&/c1/c2/t1%5C%5c')).toBe('/a path&/c1/c2/t1%5C%5C');
+    });
+
+    it('takes sequences of encoded percent signs into account.', (): void => {
+      expect(decodeUriPathComponents('/a%2Fb')).toBe('/a%2Fb');
+      expect(decodeUriPathComponents('/a%252Fb')).toBe('/a%252Fb');
+      expect(decodeUriPathComponents('/a%25252Fb')).toBe('/a%25252Fb');
+      expect(decodeUriPathComponents('/a%2525252Fb')).toBe('/a%2525252Fb');
    });
  });

@@ -128,9 +149,30 @@ describe('PathUtil', (): void => {
      expect(encodeUriPathComponents('/a%20path&/name?abc=def&xyz')).toBe('/a%2520path%26/name?abc=def&xyz');
    });

-    it('does not double-encode url encoded path separator characters.', (): void => {
+    it('does not double-encode URL-encoded path separator characters.', (): void => {
      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%2F')).toBe('/a%2520path%26/c1/c2/t1%2F');
      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%5C')).toBe('/a%2520path%26/c1/c2/t1%5C');
+      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%252F')).toBe('/a%2520path%26/c1/c2/t1%252F');
+      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%255C')).toBe('/a%2520path%26/c1/c2/t1%255C');
+      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%%252F')).toBe('/a%2520path%26/c1/c2/t1%25%252F');
+      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%%255C')).toBe('/a%2520path%26/c1/c2/t1%25%255C');
+    });
+
+    it('normalizes to uppercase encoding.', (): void => {
+      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%2f')).toBe('/a%2520path%26/c1/c2/t1%2F');
+      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%5c')).toBe('/a%2520path%26/c1/c2/t1%5C');
+    });
+
+    it('accepts paths with mixed lowercase and uppercase encoding.', (): void => {
+      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%2F%2f')).toBe('/a%2520path%26/c1/c2/t1%2F%2F');
+      expect(encodeUriPathComponents('/a%20path&/c1/c2/t1%5C%5c')).toBe('/a%2520path%26/c1/c2/t1%5C%5C');
+    });
+
+    it('takes sequences of encoded percent signs into account.', (): void => {
+      expect(encodeUriPathComponents('/a%2Fb')).toBe('/a%2Fb');
+      expect(encodeUriPathComponents('/a%252Fb')).toBe('/a%252Fb');
+      expect(encodeUriPathComponents('/a%25252Fb')).toBe('/a%25252Fb');
+      expect(encodeUriPathComponents('/a%2525252Fb')).toBe('/a%2525252Fb');
    });
  });