feat: Create SubdomainExtensionBasedMapper

This is required for file backends when supporting identifiers containing subdomains.
This commit is contained in:
Joachim Van Herwegen
2021-02-12 11:45:14 +01:00
parent e9502e55a7
commit bdb3621ee3
7 changed files with 232 additions and 7 deletions

View File

@@ -169,6 +169,7 @@ export * from './storage/mapping/BaseFileIdentifierMapper';
export * from './storage/mapping/ExtensionBasedMapper';
export * from './storage/mapping/FileIdentifierMapper';
export * from './storage/mapping/FixedContentTypeMapper';
export * from './storage/mapping/SubdomainExtensionBasedMapper';
// Storage/Patch
export * from './storage/patch/PatchHandler';

View File

@@ -53,17 +53,25 @@ export class ExtensionBasedMapper extends BaseFileIdentifierMapper {
}
protected async getDocumentUrl(relative: string): Promise<string> {
const extension = getExtension(relative);
if (extension && relative.endsWith(`$.${extension}`)) {
relative = relative.slice(0, -(extension.length + 2));
}
return super.getDocumentUrl(relative);
return super.getDocumentUrl(this.stripExtension(relative));
}
protected async getContentTypeFromPath(filePath: string): Promise<string> {
return this.types[getExtension(filePath).toLowerCase()] ||
super.getContentTypeFromPath(filePath);
}
/**
* Helper function that removes the internal extension, one starting with $., from the given path.
* Nothing happens if no such extension is present.
*/
protected stripExtension(path: string): string {
const extension = getExtension(path);
if (extension && path.endsWith(`$.${extension}`)) {
path = path.slice(0, -(extension.length + 2));
}
return path;
}
}
export class ExtensionBasedMapperFactory implements FileIdentifierMapperFactory<ExtensionBasedMapper> {

View File

@@ -0,0 +1,93 @@
import { toASCII, toUnicode } from 'punycode/';
import type { ResourceIdentifier } from '../../ldp/representation/ResourceIdentifier';
import { TEXT_TURTLE } from '../../util/ContentTypes';
import { ForbiddenHttpError } from '../../util/errors/ForbiddenHttpError';
import { InternalServerError } from '../../util/errors/InternalServerError';
import { NotFoundHttpError } from '../../util/errors/NotFoundHttpError';
import {
decodeUriPathComponents,
encodeUriPathComponents,
ensureTrailingSlash,
createSubdomainRegexp,
extractScheme,
trimTrailingSlashes,
} from '../../util/PathUtil';
import { ExtensionBasedMapper } from './ExtensionBasedMapper';
/**
* Extends the functionality of an {@link ExtensionBasedMapper} to support identifiers containing subdomains.
* This is mostly only relevant in case you want to support multiple pods with subdomain identifiers
* in a single ResourceStore.
*
* When converting to/from file paths, the subdomain is interpreted as a folder in the rootFilePath.
* The rest of the path is then interpreted relative to that folder.
* E.g. `http://alice.test.com/foo` results in the relative path `/alice/foo`.
*
* In case there is no subdomain in the URL, the `baseSubdomain` parameter is used instead.
* E.g., if the `baseSubdomain` is "www", `http://test.com/foo` would result in the relative path `/www/foo`.
* This means that there is no identifier that maps to the `rootFilePath` itself.
* To prevent the possibility of 2 identifiers linking to the same file,
* identifiers containing the default subdomain are rejected.
* E.g., `http://www.test.com/foo` would result in a 403, even if `http://test.com/foo` exists.
*/
export class SubdomainExtensionBasedMapper extends ExtensionBasedMapper {
private readonly baseSubdomain: string;
private readonly regex: RegExp;
private readonly baseParts: { scheme: string; rest: string };
public constructor(base: string, rootFilepath: string, baseSubdomain = 'www',
overrideTypes = { acl: TEXT_TURTLE, meta: TEXT_TURTLE }) {
super(base, rootFilepath, overrideTypes);
this.baseSubdomain = baseSubdomain;
this.regex = createSubdomainRegexp(ensureTrailingSlash(base));
this.baseParts = extractScheme(ensureTrailingSlash(base));
}
protected async getContainerUrl(relative: string): Promise<string> {
return ensureTrailingSlash(this.relativeToUrl(relative));
}
protected async getDocumentUrl(relative: string): Promise<string> {
relative = this.stripExtension(relative);
return trimTrailingSlashes(this.relativeToUrl(relative));
}
/**
* Converts a relative path to a URL.
* Examples assuming http://test.com/ is the base url and `www` the base subdomain:
* * /www/foo gives http://test.com/foo
* * /alice/foo/ gives http://alice.test.com/foo/
*/
protected relativeToUrl(relative: string): string {
const match = /^\/([^/]+)\/(.*)$/u.exec(relative);
if (!Array.isArray(match)) {
throw new InternalServerError(`Illegal relative path ${relative}`);
}
const tail = encodeUriPathComponents(match[2]);
if (match[1] === this.baseSubdomain) {
return `${this.baseRequestURI}/${tail}`;
}
return `${this.baseParts.scheme}${toASCII(match[1])}.${this.baseParts.rest}${tail}`;
}
/**
* Gets the relative path as though the subdomain url is the base, and then prepends it with the subdomain.
* Examples assuming http://test.com/ is the base url and `www` the base subdomain:
* * http://test.com/foo gives /www/foo
* * http://alice.test.com/foo/ gives /alice/foo/
*/
protected getRelativePath(identifier: ResourceIdentifier): string {
const match = this.regex.exec(identifier.path);
if (!Array.isArray(match)) {
this.logger.warn(`The URL ${identifier.path} is outside of the scope ${this.baseRequestURI}`);
throw new NotFoundHttpError();
}
// Otherwise 2 different identifiers would be able to access the same resource
if (match[1] === this.baseSubdomain) {
throw new ForbiddenHttpError(`Subdomain ${this.baseSubdomain} can not be used.`);
}
const tail = `/${decodeUriPathComponents(identifier.path.slice(match[0].length))}`;
const subdomain = match[1] ? toUnicode(match[1]) : this.baseSubdomain;
return `/${subdomain}${tail}`;
}
}

View File

@@ -138,3 +138,30 @@ export function isContainerPath(path: string): boolean {
export function isContainerIdentifier(identifier: ResourceIdentifier): boolean {
return isContainerPath(identifier.path);
}
/**
* Splits a URL (or similar) string into a part containing its scheme and one containing the rest.
* E.g., `http://test.com/` results in `{ scheme: 'http://', rest: 'test.com/' }`.
* @param url - String to parse.
*/
export function extractScheme(url: string): { scheme: string; rest: string} {
const match = /^([^:]+:\/\/)(.*)$/u.exec(url)!;
return { scheme: match[1], rest: match[2] };
}
/**
* Creates a regular expression that matches URLs containing the given baseUrl, or a subdomain of the given baseUrl.
* In case there is a subdomain, the first match of the regular expression will be that subdomain.
*
* Examples with baseUrl `http://test.com/foo/`:
* - Will match `http://test.com/foo/`
* - Will match `http://test.com/foo/bar/baz`
* - Will match `http://alice.bob.test.com/foo/bar/baz`, first match result will be `alice.bob`
* - Will not match `http://test.com/`
* - Will not match `http://alicetest.com/foo/`
* @param baseUrl - Base URL for the regular expression.
*/
export function createSubdomainRegexp(baseUrl: string): RegExp {
const { scheme, rest } = extractScheme(baseUrl);
return new RegExp(`^${scheme}(?:([^/]+)\\.)?${rest}`, 'u');
}