diff --git a/src/storage/accessors/FileDataAccessor.ts b/src/storage/accessors/FileDataAccessor.ts index 884950273..a07e4c37a 100644 --- a/src/storage/accessors/FileDataAccessor.ts +++ b/src/storage/accessors/FileDataAccessor.ts @@ -5,11 +5,13 @@ import type { Quad } from 'rdf-js'; import type { Representation } from '../../http/representation/Representation'; import { RepresentationMetadata } from '../../http/representation/RepresentationMetadata'; import type { ResourceIdentifier } from '../../http/representation/ResourceIdentifier'; +import { getLoggerFor } from '../../logging/LogUtil'; import { NotFoundHttpError } from '../../util/errors/NotFoundHttpError'; import { isSystemError } from '../../util/errors/SystemError'; import { UnsupportedMediaTypeHttpError } from '../../util/errors/UnsupportedMediaTypeHttpError'; import { guardStream } from '../../util/GuardedStream'; import type { Guarded } from '../../util/GuardedStream'; +import { parseContentType } from '../../util/HeaderUtil'; import { joinFilePath, isContainerIdentifier } from '../../util/PathUtil'; import { parseQuads, serializeQuads } from '../../util/QuadUtil'; import { addResourceMetadata, updateModifiedDate } from '../../util/ResourceUtil'; @@ -22,6 +24,8 @@ import type { DataAccessor } from './DataAccessor'; * DataAccessor that uses the file system to store documents as files and containers as folders. */ export class FileDataAccessor implements DataAccessor { + protected readonly logger = getLoggerFor(this); + protected readonly resourceMapper: FileIdentifierMapper; public constructor(resourceMapper: FileIdentifierMapper) { @@ -303,8 +307,15 @@ export class FileDataAccessor implements DataAccessor { addResourceMetadata(metadata, childStats.isDirectory()); this.addPosixMetadata(metadata, childStats); // Containers will not have a content-type - if (childLink.contentType) { - metadata.add(RDF.terms.type, toNamedTerm(`${IANA.namespace}${childLink.contentType}#Resource`)); + const { contentType, identifier } = childLink; + if (contentType) { + // Make sure we don't generate invalid URIs + try { + const { value } = parseContentType(contentType); + metadata.add(RDF.terms.type, toNamedTerm(`${IANA.namespace}${value}#Resource`)); + } catch { + this.logger.warn(`Detected an invalid content-type "${contentType}" for ${identifier.path}`); + } } yield metadata; diff --git a/src/util/HeaderUtil.ts b/src/util/HeaderUtil.ts index 590bf004b..be6269350 100644 --- a/src/util/HeaderUtil.ts +++ b/src/util/HeaderUtil.ts @@ -109,7 +109,9 @@ export interface ContentType { } // REUSED REGEXES -const token = /^[a-zA-Z0-9!#$%&'*+-.^_`|~]+$/u; +const tchar = /[a-zA-Z0-9!#$%&'*+-.^_`|~]/u; +const token = new RegExp(`^${tchar.source}+$`, 'u'); +const mediaRange = new RegExp(`${tchar.source}+/${tchar.source}+`, 'u'); // HELPER FUNCTIONS /** @@ -218,8 +220,7 @@ function parseAcceptPart(part: string, replacements: Record): Ac const [ range, ...parameters ] = part.split(';').map((param): string => param.trim()); // No reason to test differently for * since we don't check if the type exists - const [ type, subtype ] = range.split('/'); - if (!type || !subtype || !token.test(type) || !token.test(subtype)) { + if (!mediaRange.test(range)) { logger.warn(`Invalid Accept range: ${range}`); throw new BadRequestHttpError( `Invalid Accept range: ${range} does not match ( "*/*" / ( token "/" "*" ) / ( token "/" token ) )`, @@ -438,6 +439,11 @@ export function parseContentType(input: string): ContentType { // Quoted strings could prevent split from having correct results const { result, replacements } = transformQuotedStrings(input); const [ value, ...params ] = result.split(';').map((str): string => str.trim()); + if (!mediaRange.test(value)) { + logger.warn(`Invalid content-type: ${value}`); + throw new BadRequestHttpError(`Invalid content-type: ${value} does not match ( token "/" token )`); + } + return parseParameters(params, replacements) .reduce( (prev, cur): ContentType => { diff --git a/test/unit/http/auxiliary/RdfValidator.test.ts b/test/unit/http/auxiliary/RdfValidator.test.ts index 16e3fe8f4..4d5062018 100644 --- a/test/unit/http/auxiliary/RdfValidator.test.ts +++ b/test/unit/http/auxiliary/RdfValidator.test.ts @@ -26,8 +26,8 @@ describe('An RdfValidator', (): void => { }); it('validates data by running it through a converter.', async(): Promise => { - converter.handleSafe = jest.fn().mockResolvedValue(new BasicRepresentation('transformedData', 'wrongType')); - const representation = new BasicRepresentation('data', 'content-type'); + converter.handleSafe = jest.fn().mockResolvedValue(new BasicRepresentation('transformedData', 'wrong/type')); + const representation = new BasicRepresentation('data', 'content/type'); const quads = representation.metadata.quads(); // Output is not important for this Validator await expect(validator.handle({ representation, identifier })).resolves.toBeDefined(); @@ -39,7 +39,7 @@ describe('An RdfValidator', (): void => { it('throws an error when validating invalid data.', async(): Promise => { converter.handleSafe = jest.fn().mockRejectedValue(new Error('bad data!')); - const representation = new BasicRepresentation('data', 'content-type'); + const representation = new BasicRepresentation('data', 'content/type'); await expect(validator.handle({ representation, identifier })).rejects.toThrow('bad data!'); // Make sure the data on the readable has not been reset expect(representation.data.destroyed).toBe(true); diff --git a/test/unit/http/input/BasicRequestParser.test.ts b/test/unit/http/input/BasicRequestParser.test.ts index 2d385a1f5..5dc28a94d 100644 --- a/test/unit/http/input/BasicRequestParser.test.ts +++ b/test/unit/http/input/BasicRequestParser.test.ts @@ -16,7 +16,7 @@ describe('A BasicRequestParser', (): void => { let requestParser: BasicRequestParser; beforeEach(async(): Promise => { - targetExtractor = new StaticAsyncHandler(true, 'target' as any); + targetExtractor = new StaticAsyncHandler(true, { path: 'target' }); preferenceParser = new StaticAsyncHandler(true, 'preference' as any); metadataParser = new StaticAsyncHandler(true, undefined); conditionsParser = new StaticAsyncHandler(true, 'conditions' as any); @@ -39,10 +39,10 @@ describe('A BasicRequestParser', (): void => { bodyParser.handle = ({ metadata }): any => ({ data: 'body', metadata }); await expect(requestParser.handle({ url: 'url', method: 'GET' } as any)).resolves.toEqual({ method: 'GET', - target: 'target', + target: { path: 'target' }, preferences: 'preference', conditions: 'conditions', - body: { data: 'body', metadata: new RepresentationMetadata('target') }, + body: { data: 'body', metadata: new RepresentationMetadata({ path: 'target' }) }, }); }); }); diff --git a/test/unit/storage/accessors/FileDataAccessor.test.ts b/test/unit/storage/accessors/FileDataAccessor.test.ts index a978d84cb..51d3ce687 100644 --- a/test/unit/storage/accessors/FileDataAccessor.test.ts +++ b/test/unit/storage/accessors/FileDataAccessor.test.ts @@ -5,6 +5,7 @@ import type { Representation } from '../../../../src/http/representation/Represe import { RepresentationMetadata } from '../../../../src/http/representation/RepresentationMetadata'; import { FileDataAccessor } from '../../../../src/storage/accessors/FileDataAccessor'; import { ExtensionBasedMapper } from '../../../../src/storage/mapping/ExtensionBasedMapper'; +import type { FileIdentifierMapper, ResourceLink } from '../../../../src/storage/mapping/FileIdentifierMapper'; import { APPLICATION_OCTET_STREAM } from '../../../../src/util/ContentTypes'; import { ConflictHttpError } from '../../../../src/util/errors/ConflictHttpError'; import { NotFoundHttpError } from '../../../../src/util/errors/NotFoundHttpError'; @@ -26,6 +27,7 @@ now.setMilliseconds(0); describe('A FileDataAccessor', (): void => { const base = 'http://test.com/'; + let mapper: FileIdentifierMapper; let accessor: FileDataAccessor; let cache: { data: any }; let metadata: RepresentationMetadata; @@ -33,7 +35,8 @@ describe('A FileDataAccessor', (): void => { beforeEach(async(): Promise => { cache = mockFs(rootFilePath, now); - accessor = new FileDataAccessor(new ExtensionBasedMapper(base, rootFilePath)); + mapper = new ExtensionBasedMapper(base, rootFilePath); + accessor = new FileDataAccessor(mapper); metadata = new RepresentationMetadata(APPLICATION_OCTET_STREAM); @@ -192,6 +195,48 @@ describe('A FileDataAccessor', (): void => { } }); + it('does not generate IANA URIs for children with invalid content-types.', async(): Promise => { + cache.data = { + container: { + resource1: 'data', + resource2: 'badData', + }, + }; + + const badMapper: jest.Mocked = { + mapFilePathToUrl: jest.fn(async(filePath: string, isContainer: boolean): Promise => { + const result = await mapper.mapFilePathToUrl(filePath, isContainer); + if (filePath.endsWith('resource2')) { + result.contentType = 'this is not a valid type'; + } + return result; + }), + mapUrlToFilePath: jest.fn((...args): Promise => mapper.mapUrlToFilePath(...args)), + }; + + accessor = new FileDataAccessor(badMapper); + + const children = []; + for await (const child of accessor.getChildren({ path: `${base}container/` })) { + children.push(child); + } + + // Identifiers + expect(children).toHaveLength(2); + expect(new Set(children.map((child): string => child.identifier.value))).toEqual(new Set([ + `${base}container/resource1`, + `${base}container/resource2`, + ])); + + const types1 = children[0].getAll(RDF.type).map((term): string => term.value); + const types2 = children[1].getAll(RDF.type).map((term): string => term.value); + + expect(types1).toContain('http://www.w3.org/ns/iana/media-types/application/octet-stream#Resource'); + for (const type of types2) { + expect(type).not.toMatch(/^http:\/\/www\.w3.org\/ns\/iana\/media-types\//u); + } + }); + it('adds stored metadata when requesting metadata.', async(): Promise => { cache.data = { resource: 'data', 'resource.meta': ' .' }; metadata = await accessor.getMetadata({ path: `${base}resource` }); diff --git a/test/unit/storage/accessors/SparqlDataAccessor.test.ts b/test/unit/storage/accessors/SparqlDataAccessor.test.ts index cbd7e4f81..0a7cd47cb 100644 --- a/test/unit/storage/accessors/SparqlDataAccessor.test.ts +++ b/test/unit/storage/accessors/SparqlDataAccessor.test.ts @@ -72,7 +72,7 @@ describe('A SparqlDataAccessor', (): void => { it('can only handle quad data.', async(): Promise => { let representation = new BasicRepresentation(data, metadata, true); await expect(accessor.canHandle(representation)).rejects.toThrow(UnsupportedMediaTypeHttpError); - representation = new BasicRepresentation(data, 'newInternalType', false); + representation = new BasicRepresentation(data, 'internal/newInternalType', false); await expect(accessor.canHandle(representation)).rejects.toThrow(UnsupportedMediaTypeHttpError); representation = new BasicRepresentation(data, INTERNAL_QUADS, false); metadata.contentType = INTERNAL_QUADS; diff --git a/test/unit/util/HeaderUtil.test.ts b/test/unit/util/HeaderUtil.test.ts index 707251436..7c9368927 100644 --- a/test/unit/util/HeaderUtil.test.ts +++ b/test/unit/util/HeaderUtil.test.ts @@ -1,4 +1,5 @@ import type { HttpResponse } from '../../../src/server/HttpResponse'; +import { BadRequestHttpError } from '../../../src/util/errors/BadRequestHttpError'; import { addHeader, parseAccept, @@ -213,6 +214,10 @@ describe('HeaderUtil', (): void => { contentTypePlain.parameters.test = 'value1'; expect(parseContentType('text/plain; charset=utf-8;test="value1"')).toEqual(contentTypePlain); }); + + it('errors on invalid content-types.', (): void => { + expect((): any => parseContentType('invalid type')).toThrow(BadRequestHttpError); + }); }); describe('#parseForwarded', (): void => { diff --git a/test/unit/util/ResourceUtil.test.ts b/test/unit/util/ResourceUtil.test.ts index a78377c6f..f177f9658 100644 --- a/test/unit/util/ResourceUtil.test.ts +++ b/test/unit/util/ResourceUtil.test.ts @@ -10,7 +10,7 @@ describe('ResourceUtil', (): void => { let representation: Representation; beforeEach(async(): Promise => { - representation = new BasicRepresentation('data', 'metadata'); + representation = new BasicRepresentation('data', 'meta/data'); }); describe('#updateModifiedDate', (): void => { @@ -55,7 +55,7 @@ describe('ResourceUtil', (): void => { it('ensures that original representation does not update when the clone is updated.', async(): Promise => { const res = await cloneRepresentation(representation); - res.metadata.contentType = 'typetype'; + res.metadata.contentType = 'type/type'; expect(representation.metadata.contentType).not.toBe(res.metadata.contentType); }); });