feat: Parse content-type more strictly

This commit is contained in:
Joachim Van Herwegen
2022-03-28 10:10:56 +02:00
parent 7152897b89
commit 027e3707fd
8 changed files with 82 additions and 15 deletions

View File

@@ -5,11 +5,13 @@ import type { Quad } from 'rdf-js';
import type { Representation } from '../../http/representation/Representation';
import { RepresentationMetadata } from '../../http/representation/RepresentationMetadata';
import type { ResourceIdentifier } from '../../http/representation/ResourceIdentifier';
import { getLoggerFor } from '../../logging/LogUtil';
import { NotFoundHttpError } from '../../util/errors/NotFoundHttpError';
import { isSystemError } from '../../util/errors/SystemError';
import { UnsupportedMediaTypeHttpError } from '../../util/errors/UnsupportedMediaTypeHttpError';
import { guardStream } from '../../util/GuardedStream';
import type { Guarded } from '../../util/GuardedStream';
import { parseContentType } from '../../util/HeaderUtil';
import { joinFilePath, isContainerIdentifier } from '../../util/PathUtil';
import { parseQuads, serializeQuads } from '../../util/QuadUtil';
import { addResourceMetadata, updateModifiedDate } from '../../util/ResourceUtil';
@@ -22,6 +24,8 @@ import type { DataAccessor } from './DataAccessor';
* DataAccessor that uses the file system to store documents as files and containers as folders.
*/
export class FileDataAccessor implements DataAccessor {
protected readonly logger = getLoggerFor(this);
protected readonly resourceMapper: FileIdentifierMapper;
public constructor(resourceMapper: FileIdentifierMapper) {
@@ -303,8 +307,15 @@ export class FileDataAccessor implements DataAccessor {
addResourceMetadata(metadata, childStats.isDirectory());
this.addPosixMetadata(metadata, childStats);
// Containers will not have a content-type
if (childLink.contentType) {
metadata.add(RDF.terms.type, toNamedTerm(`${IANA.namespace}${childLink.contentType}#Resource`));
const { contentType, identifier } = childLink;
if (contentType) {
// Make sure we don't generate invalid URIs
try {
const { value } = parseContentType(contentType);
metadata.add(RDF.terms.type, toNamedTerm(`${IANA.namespace}${value}#Resource`));
} catch {
this.logger.warn(`Detected an invalid content-type "${contentType}" for ${identifier.path}`);
}
}
yield metadata;

View File

@@ -109,7 +109,9 @@ export interface ContentType {
}
// REUSED REGEXES
const token = /^[a-zA-Z0-9!#$%&'*+-.^_`|~]+$/u;
const tchar = /[a-zA-Z0-9!#$%&'*+-.^_`|~]/u;
const token = new RegExp(`^${tchar.source}+$`, 'u');
const mediaRange = new RegExp(`${tchar.source}+/${tchar.source}+`, 'u');
// HELPER FUNCTIONS
/**
@@ -218,8 +220,7 @@ function parseAcceptPart(part: string, replacements: Record<string, string>): Ac
const [ range, ...parameters ] = part.split(';').map((param): string => param.trim());
// No reason to test differently for * since we don't check if the type exists
const [ type, subtype ] = range.split('/');
if (!type || !subtype || !token.test(type) || !token.test(subtype)) {
if (!mediaRange.test(range)) {
logger.warn(`Invalid Accept range: ${range}`);
throw new BadRequestHttpError(
`Invalid Accept range: ${range} does not match ( "*/*" / ( token "/" "*" ) / ( token "/" token ) )`,
@@ -438,6 +439,11 @@ export function parseContentType(input: string): ContentType {
// Quoted strings could prevent split from having correct results
const { result, replacements } = transformQuotedStrings(input);
const [ value, ...params ] = result.split(';').map((str): string => str.trim());
if (!mediaRange.test(value)) {
logger.warn(`Invalid content-type: ${value}`);
throw new BadRequestHttpError(`Invalid content-type: ${value} does not match ( token "/" token )`);
}
return parseParameters(params, replacements)
.reduce<ContentType>(
(prev, cur): ContentType => {