From bdb3621ee33e513bf6b6086e502940433c118946 Mon Sep 17 00:00:00 2001 From: Joachim Van Herwegen Date: Fri, 12 Feb 2021 11:45:14 +0100 Subject: [PATCH] feat: Create SubdomainExtensionBasedMapper This is required for file backends when supporting identifiers containing subdomains. --- package-lock.json | 8 +- package.json | 2 + src/index.ts | 1 + src/storage/mapping/ExtensionBasedMapper.ts | 18 +++- .../mapping/SubdomainExtensionBasedMapper.ts | 93 +++++++++++++++++++ src/util/PathUtil.ts | 27 ++++++ .../SubdomainExtensionBasedMapper.test.ts | 90 ++++++++++++++++++ 7 files changed, 232 insertions(+), 7 deletions(-) create mode 100644 src/storage/mapping/SubdomainExtensionBasedMapper.ts create mode 100644 test/unit/storage/mapping/SubdomainExtensionBasedMapper.test.ts diff --git a/package-lock.json b/package-lock.json index 20bfcac68..b2f28a7d3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1311,6 +1311,11 @@ "@types/node": "*" } }, + "@types/punycode": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@types/punycode/-/punycode-2.1.0.tgz", + "integrity": "sha512-PG5aLpW6PJOeV2fHRslP4IOMWn+G+Uq8CfnyJ+PDS8ndCbU+soO+fB3NKCKo0p/Jh2Y4aPaiQZsrOXFdzpcA6g==" + }, "@types/rdf-js": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/@types/rdf-js/-/rdf-js-4.0.0.tgz", @@ -7554,8 +7559,7 @@ "punycode": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz", - "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==", - "dev": true + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==" }, "pupa": { "version": "2.1.1", diff --git a/package.json b/package.json index e05c66176..9e80e1d05 100644 --- a/package.json +++ b/package.json @@ -84,6 +84,7 @@ "@types/n3": "^1.4.4", "@types/node": "^14.10.2", "@types/pump": "^1.1.0", + "@types/punycode": "^2.1.0", "@types/rdf-js": "^4.0.0", "@types/sparqljs": "^3.1.0", "@types/streamify-array": "^1.0.0", @@ -101,6 +102,7 @@ "mime-types": "^2.1.27", "n3": "^1.8.0", "pump": "^3.0.0", + "punycode": "^2.1.1", "rdf-parse": "^1.7.0", "rdf-serialize": "^1.1.0", "rdf-terms": "^1.5.1", diff --git a/src/index.ts b/src/index.ts index a4284ce4c..d9fa01c92 100644 --- a/src/index.ts +++ b/src/index.ts @@ -169,6 +169,7 @@ export * from './storage/mapping/BaseFileIdentifierMapper'; export * from './storage/mapping/ExtensionBasedMapper'; export * from './storage/mapping/FileIdentifierMapper'; export * from './storage/mapping/FixedContentTypeMapper'; +export * from './storage/mapping/SubdomainExtensionBasedMapper'; // Storage/Patch export * from './storage/patch/PatchHandler'; diff --git a/src/storage/mapping/ExtensionBasedMapper.ts b/src/storage/mapping/ExtensionBasedMapper.ts index 987750e0c..c01027491 100644 --- a/src/storage/mapping/ExtensionBasedMapper.ts +++ b/src/storage/mapping/ExtensionBasedMapper.ts @@ -53,17 +53,25 @@ export class ExtensionBasedMapper extends BaseFileIdentifierMapper { } protected async getDocumentUrl(relative: string): Promise { - const extension = getExtension(relative); - if (extension && relative.endsWith(`$.${extension}`)) { - relative = relative.slice(0, -(extension.length + 2)); - } - return super.getDocumentUrl(relative); + return super.getDocumentUrl(this.stripExtension(relative)); } protected async getContentTypeFromPath(filePath: string): Promise { return this.types[getExtension(filePath).toLowerCase()] || super.getContentTypeFromPath(filePath); } + + /** + * Helper function that removes the internal extension, one starting with $., from the given path. + * Nothing happens if no such extension is present. + */ + protected stripExtension(path: string): string { + const extension = getExtension(path); + if (extension && path.endsWith(`$.${extension}`)) { + path = path.slice(0, -(extension.length + 2)); + } + return path; + } } export class ExtensionBasedMapperFactory implements FileIdentifierMapperFactory { diff --git a/src/storage/mapping/SubdomainExtensionBasedMapper.ts b/src/storage/mapping/SubdomainExtensionBasedMapper.ts new file mode 100644 index 000000000..8691ca396 --- /dev/null +++ b/src/storage/mapping/SubdomainExtensionBasedMapper.ts @@ -0,0 +1,93 @@ +import { toASCII, toUnicode } from 'punycode/'; +import type { ResourceIdentifier } from '../../ldp/representation/ResourceIdentifier'; +import { TEXT_TURTLE } from '../../util/ContentTypes'; +import { ForbiddenHttpError } from '../../util/errors/ForbiddenHttpError'; +import { InternalServerError } from '../../util/errors/InternalServerError'; +import { NotFoundHttpError } from '../../util/errors/NotFoundHttpError'; +import { + decodeUriPathComponents, + encodeUriPathComponents, + ensureTrailingSlash, + createSubdomainRegexp, + extractScheme, + trimTrailingSlashes, +} from '../../util/PathUtil'; +import { ExtensionBasedMapper } from './ExtensionBasedMapper'; + +/** + * Extends the functionality of an {@link ExtensionBasedMapper} to support identifiers containing subdomains. + * This is mostly only relevant in case you want to support multiple pods with subdomain identifiers + * in a single ResourceStore. + * + * When converting to/from file paths, the subdomain is interpreted as a folder in the rootFilePath. + * The rest of the path is then interpreted relative to that folder. + * E.g. `http://alice.test.com/foo` results in the relative path `/alice/foo`. + * + * In case there is no subdomain in the URL, the `baseSubdomain` parameter is used instead. + * E.g., if the `baseSubdomain` is "www", `http://test.com/foo` would result in the relative path `/www/foo`. + * This means that there is no identifier that maps to the `rootFilePath` itself. + * To prevent the possibility of 2 identifiers linking to the same file, + * identifiers containing the default subdomain are rejected. + * E.g., `http://www.test.com/foo` would result in a 403, even if `http://test.com/foo` exists. + */ +export class SubdomainExtensionBasedMapper extends ExtensionBasedMapper { + private readonly baseSubdomain: string; + private readonly regex: RegExp; + private readonly baseParts: { scheme: string; rest: string }; + + public constructor(base: string, rootFilepath: string, baseSubdomain = 'www', + overrideTypes = { acl: TEXT_TURTLE, meta: TEXT_TURTLE }) { + super(base, rootFilepath, overrideTypes); + this.baseSubdomain = baseSubdomain; + this.regex = createSubdomainRegexp(ensureTrailingSlash(base)); + this.baseParts = extractScheme(ensureTrailingSlash(base)); + } + + protected async getContainerUrl(relative: string): Promise { + return ensureTrailingSlash(this.relativeToUrl(relative)); + } + + protected async getDocumentUrl(relative: string): Promise { + relative = this.stripExtension(relative); + return trimTrailingSlashes(this.relativeToUrl(relative)); + } + + /** + * Converts a relative path to a URL. + * Examples assuming http://test.com/ is the base url and `www` the base subdomain: + * * /www/foo gives http://test.com/foo + * * /alice/foo/ gives http://alice.test.com/foo/ + */ + protected relativeToUrl(relative: string): string { + const match = /^\/([^/]+)\/(.*)$/u.exec(relative); + if (!Array.isArray(match)) { + throw new InternalServerError(`Illegal relative path ${relative}`); + } + const tail = encodeUriPathComponents(match[2]); + if (match[1] === this.baseSubdomain) { + return `${this.baseRequestURI}/${tail}`; + } + return `${this.baseParts.scheme}${toASCII(match[1])}.${this.baseParts.rest}${tail}`; + } + + /** + * Gets the relative path as though the subdomain url is the base, and then prepends it with the subdomain. + * Examples assuming http://test.com/ is the base url and `www` the base subdomain: + * * http://test.com/foo gives /www/foo + * * http://alice.test.com/foo/ gives /alice/foo/ + */ + protected getRelativePath(identifier: ResourceIdentifier): string { + const match = this.regex.exec(identifier.path); + if (!Array.isArray(match)) { + this.logger.warn(`The URL ${identifier.path} is outside of the scope ${this.baseRequestURI}`); + throw new NotFoundHttpError(); + } + // Otherwise 2 different identifiers would be able to access the same resource + if (match[1] === this.baseSubdomain) { + throw new ForbiddenHttpError(`Subdomain ${this.baseSubdomain} can not be used.`); + } + const tail = `/${decodeUriPathComponents(identifier.path.slice(match[0].length))}`; + const subdomain = match[1] ? toUnicode(match[1]) : this.baseSubdomain; + return `/${subdomain}${tail}`; + } +} diff --git a/src/util/PathUtil.ts b/src/util/PathUtil.ts index 22f25cb6e..60fb6bb00 100644 --- a/src/util/PathUtil.ts +++ b/src/util/PathUtil.ts @@ -138,3 +138,30 @@ export function isContainerPath(path: string): boolean { export function isContainerIdentifier(identifier: ResourceIdentifier): boolean { return isContainerPath(identifier.path); } + +/** + * Splits a URL (or similar) string into a part containing its scheme and one containing the rest. + * E.g., `http://test.com/` results in `{ scheme: 'http://', rest: 'test.com/' }`. + * @param url - String to parse. + */ +export function extractScheme(url: string): { scheme: string; rest: string} { + const match = /^([^:]+:\/\/)(.*)$/u.exec(url)!; + return { scheme: match[1], rest: match[2] }; +} + +/** + * Creates a regular expression that matches URLs containing the given baseUrl, or a subdomain of the given baseUrl. + * In case there is a subdomain, the first match of the regular expression will be that subdomain. + * + * Examples with baseUrl `http://test.com/foo/`: + * - Will match `http://test.com/foo/` + * - Will match `http://test.com/foo/bar/baz` + * - Will match `http://alice.bob.test.com/foo/bar/baz`, first match result will be `alice.bob` + * - Will not match `http://test.com/` + * - Will not match `http://alicetest.com/foo/` + * @param baseUrl - Base URL for the regular expression. + */ +export function createSubdomainRegexp(baseUrl: string): RegExp { + const { scheme, rest } = extractScheme(baseUrl); + return new RegExp(`^${scheme}(?:([^/]+)\\.)?${rest}`, 'u'); +} diff --git a/test/unit/storage/mapping/SubdomainExtensionBasedMapper.test.ts b/test/unit/storage/mapping/SubdomainExtensionBasedMapper.test.ts new file mode 100644 index 000000000..3c83d9eb3 --- /dev/null +++ b/test/unit/storage/mapping/SubdomainExtensionBasedMapper.test.ts @@ -0,0 +1,90 @@ +import { SubdomainExtensionBasedMapper } from '../../../../src/storage/mapping/SubdomainExtensionBasedMapper'; +import { ForbiddenHttpError } from '../../../../src/util/errors/ForbiddenHttpError'; +import { InternalServerError } from '../../../../src/util/errors/InternalServerError'; +import { NotFoundHttpError } from '../../../../src/util/errors/NotFoundHttpError'; + +function getSubdomain(subdomain: string): string { + return `http://${subdomain}.test.com/`; +} + +describe('A SubdomainExtensionBasedMapper', (): void => { + const base = 'http://test.com/'; + const rootFilepath = 'uploads/'; + const mapper = new SubdomainExtensionBasedMapper(base, rootFilepath); + + describe('mapUrlToFilePath', (): void => { + it('converts file paths to identifiers with a subdomain.', async(): Promise => { + const identifier = { path: `${getSubdomain('alice')}test.txt` }; + await expect(mapper.mapUrlToFilePath(identifier, 'text/plain')).resolves.toEqual({ + identifier, + filePath: `${rootFilepath}alice/test.txt`, + contentType: 'text/plain', + }); + }); + + it('adds the default subdomain to the file path for root identifiers.', async(): Promise => { + const identifier = { path: `${base}test.txt` }; + await expect(mapper.mapUrlToFilePath(identifier, 'text/plain')).resolves.toEqual({ + identifier, + filePath: `${rootFilepath}www/test.txt`, + contentType: 'text/plain', + }); + }); + + it('decodes punycode when generating a file path.', async(): Promise => { + const identifier = { path: `${getSubdomain('xn--c1yn36f')}t%20est.txt` }; + await expect(mapper.mapUrlToFilePath(identifier, 'text/plain')).resolves.toEqual({ + identifier, + filePath: `${rootFilepath}點看/t est.txt`, + contentType: 'text/plain', + }); + }); + + it('errors if the path is invalid.', async(): Promise => { + const identifier = { path: `veryinvalidpath` }; + await expect(mapper.mapUrlToFilePath(identifier, 'text/plain')).rejects.toThrow(NotFoundHttpError); + }); + + it('errors if the subdomain matches the default one.', async(): Promise => { + const identifier = { path: `${getSubdomain('www')}test.txt` }; + await expect(mapper.mapUrlToFilePath(identifier, 'text/plain')).rejects.toThrow(ForbiddenHttpError); + }); + }); + + describe('mapFilePathToUrl', (): void => { + it('uses the first folder in a relative path as subdomain for identifiers.', async(): Promise => { + await expect(mapper.mapFilePathToUrl(`${rootFilepath}alice/test.txt`, false)).resolves.toEqual({ + identifier: { path: `${getSubdomain('alice')}test.txt` }, + filePath: `${rootFilepath}alice/test.txt`, + contentType: 'text/plain', + }); + }); + + it('correctly generates container identifiers.', async(): Promise => { + await expect(mapper.mapFilePathToUrl(`${rootFilepath}alice/test.txt`, true)).resolves.toEqual({ + identifier: { path: `${getSubdomain('alice')}test.txt/` }, + filePath: `${rootFilepath}alice/test.txt`, + }); + }); + + it('hides the subdomain if it matches the default one.', async(): Promise => { + await expect(mapper.mapFilePathToUrl(`${rootFilepath}www/test.txt`, false)).resolves.toEqual({ + identifier: { path: `${base}test.txt` }, + filePath: `${rootFilepath}www/test.txt`, + contentType: 'text/plain', + }); + }); + + it('encodes using punycode when generating the subdomain.', async(): Promise => { + await expect(mapper.mapFilePathToUrl(`${rootFilepath}點看/t est.txt`, false)).resolves.toEqual({ + identifier: { path: `${getSubdomain('xn--c1yn36f')}t%20est.txt` }, + filePath: `${rootFilepath}點看/t est.txt`, + contentType: 'text/plain', + }); + }); + + it('cannot convert the root filepath to an identifier.', async(): Promise => { + await expect(mapper.mapFilePathToUrl(rootFilepath, true)).rejects.toThrow(InternalServerError); + }); + }); +});