feat: Parse content-type more strictly

This commit is contained in:
Joachim Van Herwegen 2022-03-28 10:10:56 +02:00
parent 7152897b89
commit 027e3707fd
8 changed files with 82 additions and 15 deletions

View File

@ -5,11 +5,13 @@ import type { Quad } from 'rdf-js';
import type { Representation } from '../../http/representation/Representation';
import { RepresentationMetadata } from '../../http/representation/RepresentationMetadata';
import type { ResourceIdentifier } from '../../http/representation/ResourceIdentifier';
import { getLoggerFor } from '../../logging/LogUtil';
import { NotFoundHttpError } from '../../util/errors/NotFoundHttpError';
import { isSystemError } from '../../util/errors/SystemError';
import { UnsupportedMediaTypeHttpError } from '../../util/errors/UnsupportedMediaTypeHttpError';
import { guardStream } from '../../util/GuardedStream';
import type { Guarded } from '../../util/GuardedStream';
import { parseContentType } from '../../util/HeaderUtil';
import { joinFilePath, isContainerIdentifier } from '../../util/PathUtil';
import { parseQuads, serializeQuads } from '../../util/QuadUtil';
import { addResourceMetadata, updateModifiedDate } from '../../util/ResourceUtil';
@ -22,6 +24,8 @@ import type { DataAccessor } from './DataAccessor';
* DataAccessor that uses the file system to store documents as files and containers as folders.
*/
export class FileDataAccessor implements DataAccessor {
protected readonly logger = getLoggerFor(this);
protected readonly resourceMapper: FileIdentifierMapper;
public constructor(resourceMapper: FileIdentifierMapper) {
@ -303,8 +307,15 @@ export class FileDataAccessor implements DataAccessor {
addResourceMetadata(metadata, childStats.isDirectory());
this.addPosixMetadata(metadata, childStats);
// Containers will not have a content-type
if (childLink.contentType) {
metadata.add(RDF.terms.type, toNamedTerm(`${IANA.namespace}${childLink.contentType}#Resource`));
const { contentType, identifier } = childLink;
if (contentType) {
// Make sure we don't generate invalid URIs
try {
const { value } = parseContentType(contentType);
metadata.add(RDF.terms.type, toNamedTerm(`${IANA.namespace}${value}#Resource`));
} catch {
this.logger.warn(`Detected an invalid content-type "${contentType}" for ${identifier.path}`);
}
}
yield metadata;

View File

@ -109,7 +109,9 @@ export interface ContentType {
}
// REUSED REGEXES
const token = /^[a-zA-Z0-9!#$%&'*+-.^_`|~]+$/u;
const tchar = /[a-zA-Z0-9!#$%&'*+-.^_`|~]/u;
const token = new RegExp(`^${tchar.source}+$`, 'u');
const mediaRange = new RegExp(`${tchar.source}+/${tchar.source}+`, 'u');
// HELPER FUNCTIONS
/**
@ -218,8 +220,7 @@ function parseAcceptPart(part: string, replacements: Record<string, string>): Ac
const [ range, ...parameters ] = part.split(';').map((param): string => param.trim());
// No reason to test differently for * since we don't check if the type exists
const [ type, subtype ] = range.split('/');
if (!type || !subtype || !token.test(type) || !token.test(subtype)) {
if (!mediaRange.test(range)) {
logger.warn(`Invalid Accept range: ${range}`);
throw new BadRequestHttpError(
`Invalid Accept range: ${range} does not match ( "*/*" / ( token "/" "*" ) / ( token "/" token ) )`,
@ -438,6 +439,11 @@ export function parseContentType(input: string): ContentType {
// Quoted strings could prevent split from having correct results
const { result, replacements } = transformQuotedStrings(input);
const [ value, ...params ] = result.split(';').map((str): string => str.trim());
if (!mediaRange.test(value)) {
logger.warn(`Invalid content-type: ${value}`);
throw new BadRequestHttpError(`Invalid content-type: ${value} does not match ( token "/" token )`);
}
return parseParameters(params, replacements)
.reduce<ContentType>(
(prev, cur): ContentType => {

View File

@ -26,8 +26,8 @@ describe('An RdfValidator', (): void => {
});
it('validates data by running it through a converter.', async(): Promise<void> => {
converter.handleSafe = jest.fn().mockResolvedValue(new BasicRepresentation('transformedData', 'wrongType'));
const representation = new BasicRepresentation('data', 'content-type');
converter.handleSafe = jest.fn().mockResolvedValue(new BasicRepresentation('transformedData', 'wrong/type'));
const representation = new BasicRepresentation('data', 'content/type');
const quads = representation.metadata.quads();
// Output is not important for this Validator
await expect(validator.handle({ representation, identifier })).resolves.toBeDefined();
@ -39,7 +39,7 @@ describe('An RdfValidator', (): void => {
it('throws an error when validating invalid data.', async(): Promise<void> => {
converter.handleSafe = jest.fn().mockRejectedValue(new Error('bad data!'));
const representation = new BasicRepresentation('data', 'content-type');
const representation = new BasicRepresentation('data', 'content/type');
await expect(validator.handle({ representation, identifier })).rejects.toThrow('bad data!');
// Make sure the data on the readable has not been reset
expect(representation.data.destroyed).toBe(true);

View File

@ -16,7 +16,7 @@ describe('A BasicRequestParser', (): void => {
let requestParser: BasicRequestParser;
beforeEach(async(): Promise<void> => {
targetExtractor = new StaticAsyncHandler(true, 'target' as any);
targetExtractor = new StaticAsyncHandler(true, { path: 'target' });
preferenceParser = new StaticAsyncHandler(true, 'preference' as any);
metadataParser = new StaticAsyncHandler(true, undefined);
conditionsParser = new StaticAsyncHandler(true, 'conditions' as any);
@ -39,10 +39,10 @@ describe('A BasicRequestParser', (): void => {
bodyParser.handle = ({ metadata }): any => ({ data: 'body', metadata });
await expect(requestParser.handle({ url: 'url', method: 'GET' } as any)).resolves.toEqual({
method: 'GET',
target: 'target',
target: { path: 'target' },
preferences: 'preference',
conditions: 'conditions',
body: { data: 'body', metadata: new RepresentationMetadata('target') },
body: { data: 'body', metadata: new RepresentationMetadata({ path: 'target' }) },
});
});
});

View File

@ -5,6 +5,7 @@ import type { Representation } from '../../../../src/http/representation/Represe
import { RepresentationMetadata } from '../../../../src/http/representation/RepresentationMetadata';
import { FileDataAccessor } from '../../../../src/storage/accessors/FileDataAccessor';
import { ExtensionBasedMapper } from '../../../../src/storage/mapping/ExtensionBasedMapper';
import type { FileIdentifierMapper, ResourceLink } from '../../../../src/storage/mapping/FileIdentifierMapper';
import { APPLICATION_OCTET_STREAM } from '../../../../src/util/ContentTypes';
import { ConflictHttpError } from '../../../../src/util/errors/ConflictHttpError';
import { NotFoundHttpError } from '../../../../src/util/errors/NotFoundHttpError';
@ -26,6 +27,7 @@ now.setMilliseconds(0);
describe('A FileDataAccessor', (): void => {
const base = 'http://test.com/';
let mapper: FileIdentifierMapper;
let accessor: FileDataAccessor;
let cache: { data: any };
let metadata: RepresentationMetadata;
@ -33,7 +35,8 @@ describe('A FileDataAccessor', (): void => {
beforeEach(async(): Promise<void> => {
cache = mockFs(rootFilePath, now);
accessor = new FileDataAccessor(new ExtensionBasedMapper(base, rootFilePath));
mapper = new ExtensionBasedMapper(base, rootFilePath);
accessor = new FileDataAccessor(mapper);
metadata = new RepresentationMetadata(APPLICATION_OCTET_STREAM);
@ -192,6 +195,48 @@ describe('A FileDataAccessor', (): void => {
}
});
it('does not generate IANA URIs for children with invalid content-types.', async(): Promise<void> => {
cache.data = {
container: {
resource1: 'data',
resource2: 'badData',
},
};
const badMapper: jest.Mocked<FileIdentifierMapper> = {
mapFilePathToUrl: jest.fn(async(filePath: string, isContainer: boolean): Promise<ResourceLink> => {
const result = await mapper.mapFilePathToUrl(filePath, isContainer);
if (filePath.endsWith('resource2')) {
result.contentType = 'this is not a valid type';
}
return result;
}),
mapUrlToFilePath: jest.fn((...args): Promise<ResourceLink> => mapper.mapUrlToFilePath(...args)),
};
accessor = new FileDataAccessor(badMapper);
const children = [];
for await (const child of accessor.getChildren({ path: `${base}container/` })) {
children.push(child);
}
// Identifiers
expect(children).toHaveLength(2);
expect(new Set(children.map((child): string => child.identifier.value))).toEqual(new Set([
`${base}container/resource1`,
`${base}container/resource2`,
]));
const types1 = children[0].getAll(RDF.type).map((term): string => term.value);
const types2 = children[1].getAll(RDF.type).map((term): string => term.value);
expect(types1).toContain('http://www.w3.org/ns/iana/media-types/application/octet-stream#Resource');
for (const type of types2) {
expect(type).not.toMatch(/^http:\/\/www\.w3.org\/ns\/iana\/media-types\//u);
}
});
it('adds stored metadata when requesting metadata.', async(): Promise<void> => {
cache.data = { resource: 'data', 'resource.meta': '<http://this> <http://is> <http://metadata>.' };
metadata = await accessor.getMetadata({ path: `${base}resource` });

View File

@ -72,7 +72,7 @@ describe('A SparqlDataAccessor', (): void => {
it('can only handle quad data.', async(): Promise<void> => {
let representation = new BasicRepresentation(data, metadata, true);
await expect(accessor.canHandle(representation)).rejects.toThrow(UnsupportedMediaTypeHttpError);
representation = new BasicRepresentation(data, 'newInternalType', false);
representation = new BasicRepresentation(data, 'internal/newInternalType', false);
await expect(accessor.canHandle(representation)).rejects.toThrow(UnsupportedMediaTypeHttpError);
representation = new BasicRepresentation(data, INTERNAL_QUADS, false);
metadata.contentType = INTERNAL_QUADS;

View File

@ -1,4 +1,5 @@
import type { HttpResponse } from '../../../src/server/HttpResponse';
import { BadRequestHttpError } from '../../../src/util/errors/BadRequestHttpError';
import {
addHeader,
parseAccept,
@ -213,6 +214,10 @@ describe('HeaderUtil', (): void => {
contentTypePlain.parameters.test = 'value1';
expect(parseContentType('text/plain; charset=utf-8;test="value1"')).toEqual(contentTypePlain);
});
it('errors on invalid content-types.', (): void => {
expect((): any => parseContentType('invalid type')).toThrow(BadRequestHttpError);
});
});
describe('#parseForwarded', (): void => {

View File

@ -10,7 +10,7 @@ describe('ResourceUtil', (): void => {
let representation: Representation;
beforeEach(async(): Promise<void> => {
representation = new BasicRepresentation('data', 'metadata');
representation = new BasicRepresentation('data', 'meta/data');
});
describe('#updateModifiedDate', (): void => {
@ -55,7 +55,7 @@ describe('ResourceUtil', (): void => {
it('ensures that original representation does not update when the clone is updated.', async(): Promise<void> => {
const res = await cloneRepresentation(representation);
res.metadata.contentType = 'typetype';
res.metadata.contentType = 'type/type';
expect(representation.metadata.contentType).not.toBe(res.metadata.contentType);
});
});