From a8602055e67b5d23ec70f3cc0dbaee6b4235fda4 Mon Sep 17 00:00:00 2001 From: Thomas Dupont Date: Mon, 14 Mar 2022 10:27:34 +0100 Subject: [PATCH] feat: Store content type parameters * feat: support storage and retrievel of content-type parameters * test: extra unit tests for parseContentTypeWithParameters * refactor: simplify set contentType() Co-authored-by: Joachim Van Herwegen * refactor: simplify for loop because of unique blankNodes Co-authored-by: Joachim Van Herwegen * refactor: ContentTypeParameter should be contentTypeParameter Co-authored-by: Joachim Van Herwegen * refactor: remove undefined type in favor of var? syntax Co-authored-by: Joachim Van Herwegen * refactor: use new parseContentType internally * chore: remove commented code * docs: code documentation line changed Co-authored-by: Joachim Van Herwegen * refactor: Check for faulty metadata in contentType rdf structure Co-authored-by: Joachim Van Herwegen * refactor: remove all instances of blanknodes Co-authored-by: Joachim Van Herwegen * refactor: use full contentType when parsing header Co-authored-by: Joachim Van Herwegen * refactor: use quads() method instead of store.getQuads() * refactor: .value needed for type correctness * feat: ReprMetadata constructor now supports full content-type string Co-authored-by: Joachim Van Herwegen --- src/http/input/metadata/ContentTypeParser.ts | 3 +- .../representation/RepresentationMetadata.ts | 79 ++++++++++++++++++- src/util/FetchUtil.ts | 4 +- src/util/HeaderUtil.ts | 33 ++++++-- src/util/Vocabularies.ts | 7 ++ .../input/body/SparqlUpdateBodyParser.test.ts | 4 +- .../input/metadata/ContentTypeParser.test.ts | 8 +- .../RepresentationMetadata.test.ts | 72 ++++++++++++++++- test/unit/util/HeaderUtil.test.ts | 23 +++++- 9 files changed, 210 insertions(+), 23 deletions(-) diff --git a/src/http/input/metadata/ContentTypeParser.ts b/src/http/input/metadata/ContentTypeParser.ts index d4cddd1fc..eb95ddb73 100644 --- a/src/http/input/metadata/ContentTypeParser.ts +++ b/src/http/input/metadata/ContentTypeParser.ts @@ -1,5 +1,4 @@ import type { HttpRequest } from '../../../server/HttpRequest'; -import { parseContentType } from '../../../util/HeaderUtil'; import type { RepresentationMetadata } from '../../representation/RepresentationMetadata'; import { MetadataParser } from './MetadataParser'; @@ -11,7 +10,7 @@ export class ContentTypeParser extends MetadataParser { public async handle(input: { request: HttpRequest; metadata: RepresentationMetadata }): Promise { const contentType = input.request.headers['content-type']; if (contentType) { - input.metadata.contentType = parseContentType(contentType).type; + input.metadata.contentType = contentType; } } } diff --git a/src/http/representation/RepresentationMetadata.ts b/src/http/representation/RepresentationMetadata.ts index 2cb3402a3..a09d83cd8 100644 --- a/src/http/representation/RepresentationMetadata.ts +++ b/src/http/representation/RepresentationMetadata.ts @@ -2,8 +2,10 @@ import { DataFactory, Store } from 'n3'; import type { BlankNode, DefaultGraph, Literal, NamedNode, Quad, Term } from 'rdf-js'; import { getLoggerFor } from '../../logging/LogUtil'; import { InternalServerError } from '../../util/errors/InternalServerError'; +import type { ContentType } from '../../util/HeaderUtil'; +import { parseContentType } from '../../util/HeaderUtil'; import { toNamedTerm, toObjectTerm, toCachedNamedNode, isTerm, toLiteral } from '../../util/TermUtil'; -import { CONTENT_TYPE, CONTENT_TYPE_TERM, CONTENT_LENGTH_TERM, XSD } from '../../util/Vocabularies'; +import { CONTENT_TYPE_TERM, CONTENT_LENGTH_TERM, XSD, SOLID_META, RDFS } from '../../util/Vocabularies'; import type { ResourceIdentifier } from './ResourceIdentifier'; import { isResourceIdentifier } from './ResourceIdentifier'; @@ -87,9 +89,10 @@ export class RepresentationMetadata { if (overrides) { if (typeof overrides === 'string') { - overrides = { [CONTENT_TYPE]: overrides }; + this.contentType = overrides; + } else { + this.setOverrides(overrides); } - this.setOverrides(overrides); } } @@ -304,6 +307,63 @@ export class RepresentationMetadata { return this; } + private setContentType(input?: ContentType | string): void { + // Make sure complete Content-Type RDF structure is gone + this.removeContentType(); + + if (!input) { + return; + } + + if (typeof input === 'string') { + input = parseContentType(input); + } + + for (const [ key, value ] of Object.entries(input.parameters)) { + const node = DataFactory.blankNode(); + this.addQuad(this.id, SOLID_META.terms.contentTypeParameter, node); + this.addQuad(node, RDFS.terms.label, key); + this.addQuad(node, SOLID_META.terms.value, value); + } + + // Set base content type string + this.set(CONTENT_TYPE_TERM, input.value); + } + + /** + * Parse the internal RDF structure to retrieve the Record with ContentType Parameters. + * @returns A {@link ContentType} object containing the value and optional parameters if there is one. + */ + private getContentType(): ContentType | undefined { + const value = this.get(CONTENT_TYPE_TERM)?.value; + if (!value) { + return; + } + const params = this.getAll(SOLID_META.terms.contentTypeParameter); + return { + value, + parameters: Object.fromEntries(params.map((param): [string, string] => { + const labels = this.store.getObjects(param, RDFS.terms.label, null); + const values = this.store.getObjects(param, SOLID_META.terms.value, null); + if (labels.length !== 1 || values.length !== 1) { + this.logger.error(`Detected invalid content-type metadata for ${this.id.value}`); + return [ 'invalid', '' ]; + } + return [ labels[0].value, values[0].value ]; + })), + }; + } + + private removeContentType(): void { + this.removeAll(CONTENT_TYPE_TERM); + const params = this.quads(this.id, SOLID_META.terms.contentTypeParameter); + for (const quad of params) { + const paramEntries = this.quads(quad.object as BlankNode); + this.store.removeQuads(paramEntries); + } + this.store.removeQuads(params); + } + // Syntactic sugar for common predicates /** @@ -314,7 +374,18 @@ export class RepresentationMetadata { } public set contentType(input) { - this.set(CONTENT_TYPE_TERM, input); + this.setContentType(input); + } + + /** + * Shorthand for the ContentType as an object (with parameters) + */ + public get contentTypeObject(): ContentType | undefined { + return this.getContentType(); + } + + public set contentTypeObject(contentType) { + this.setContentType(contentType); } /** diff --git a/src/util/FetchUtil.ts b/src/util/FetchUtil.ts index 5398dede5..f94f6672b 100644 --- a/src/util/FetchUtil.ts +++ b/src/util/FetchUtil.ts @@ -9,7 +9,6 @@ import { getLoggerFor } from '../logging/LogUtil'; import type { RepresentationConverter } from '../storage/conversion/RepresentationConverter'; import { INTERNAL_QUADS } from './ContentTypes'; import { BadRequestHttpError } from './errors/BadRequestHttpError'; -import { parseContentType } from './HeaderUtil'; const logger = getLoggerFor('FetchUtil'); @@ -58,10 +57,9 @@ Promise { logger.warn(`Missing content-type header from ${response.url}`); throw error; } - const contentTypeValue = parseContentType(contentType).type; // Try to convert to quads - const representation = new BasicRepresentation(body, contentTypeValue); + const representation = new BasicRepresentation(body, contentType); const preferences = { type: { [INTERNAL_QUADS]: 1 }}; return converter.handleSafe({ representation, identifier: { path: response.url }, preferences }); } diff --git a/src/util/HeaderUtil.ts b/src/util/HeaderUtil.ts index 996ba092a..590bf004b 100644 --- a/src/util/HeaderUtil.ts +++ b/src/util/HeaderUtil.ts @@ -99,6 +99,15 @@ export interface AcceptLanguage extends AcceptHeader { } */ export interface AcceptDatetime extends AcceptHeader { } +/** + * Contents of a HTTP Content-Type Header. + * Optional parameters Record is included. + */ +export interface ContentType { + value: string; + parameters: Record; +} + // REUSED REGEXES const token = /^[a-zA-Z0-9!#$%&'*+-.^_`|~]+$/u; @@ -416,15 +425,27 @@ export function addHeader(response: HttpResponse, name: string, value: string | } /** - * Parses the Content-Type header. + * Parses the Content-Type header and also parses any parameters in the header. * - * @param contentType - The media type of the content-type header + * @param input - The Content-Type header string. * - * @returns The parsed media type of the content-type + * @throws {@link BadRequestHttpError} + * Thrown on invalid header syntax. + * + * @returns A {@link ContentType} object containing the value and optional parameters. */ -export function parseContentType(contentType: string): { type: string } { - const contentTypeValue = /^\s*([^;\s]*)/u.exec(contentType)![1]; - return { type: contentTypeValue }; +export function parseContentType(input: string): ContentType { + // Quoted strings could prevent split from having correct results + const { result, replacements } = transformQuotedStrings(input); + const [ value, ...params ] = result.split(';').map((str): string => str.trim()); + return parseParameters(params, replacements) + .reduce( + (prev, cur): ContentType => { + prev.parameters[cur.name] = cur.value; + return prev; + }, + { value, parameters: {}}, + ); } /** diff --git a/src/util/Vocabularies.ts b/src/util/Vocabularies.ts index d4515fc26..abde08e88 100644 --- a/src/util/Vocabularies.ts +++ b/src/util/Vocabularies.ts @@ -123,6 +123,10 @@ export const RDF = createUriAndTermNamespace('http://www.w3.org/1999/02/22-rdf-s 'type', ); +export const RDFS = createUriAndTermNamespace('http://www.w3.org/2000/01/rdf-schema#', + 'label', +); + export const SOLID = createUriAndTermNamespace('http://www.w3.org/ns/solid/terms#', 'deletes', 'inserts', @@ -148,6 +152,9 @@ export const SOLID_META = createUriAndTermNamespace('urn:npm:solid:community-ser 'ResponseMetadata', // This is used to identify templates that can be used for the representation of a resource 'template', + // This is used to store Content-Type Parameters + 'contentTypeParameter', + 'value', ); export const VANN = createUriAndTermNamespace('http://purl.org/vocab/vann/', diff --git a/test/unit/http/input/body/SparqlUpdateBodyParser.test.ts b/test/unit/http/input/body/SparqlUpdateBodyParser.test.ts index d571eed62..1f1ac0eb1 100644 --- a/test/unit/http/input/body/SparqlUpdateBodyParser.test.ts +++ b/test/unit/http/input/body/SparqlUpdateBodyParser.test.ts @@ -25,9 +25,9 @@ describe('A SparqlUpdateBodyParser', (): void => { input.metadata.contentType = 'text/plain'; await expect(bodyParser.canHandle(input)).rejects.toThrow(UnsupportedMediaTypeHttpError); input.metadata.contentType = 'application/sparql-update;charset=utf-8'; - await expect(bodyParser.canHandle(input)).rejects.toThrow(UnsupportedMediaTypeHttpError); + await expect(bodyParser.canHandle(input)).resolves.toBeUndefined(); input.metadata.contentType = 'application/sparql-update ; foo=bar'; - await expect(bodyParser.canHandle(input)).rejects.toThrow(UnsupportedMediaTypeHttpError); + await expect(bodyParser.canHandle(input)).resolves.toBeUndefined(); input.metadata.contentType = 'application/sparql-update'; await expect(bodyParser.canHandle(input)).resolves.toBeUndefined(); }); diff --git a/test/unit/http/input/metadata/ContentTypeParser.test.ts b/test/unit/http/input/metadata/ContentTypeParser.test.ts index 86c465822..b718ddb71 100644 --- a/test/unit/http/input/metadata/ContentTypeParser.test.ts +++ b/test/unit/http/input/metadata/ContentTypeParser.test.ts @@ -20,7 +20,13 @@ describe('A ContentTypeParser', (): void => { it('sets the given content-type as metadata.', async(): Promise => { request.headers['content-type'] = 'text/plain;charset=UTF-8'; await expect(parser.handle({ request, metadata })).resolves.toBeUndefined(); - expect(metadata.quads()).toHaveLength(1); + expect(metadata.quads()).toHaveLength(4); expect(metadata.contentType).toBe('text/plain'); + expect(metadata.contentTypeObject).toEqual({ + value: 'text/plain', + parameters: { + charset: 'UTF-8', + }, + }); }); }); diff --git a/test/unit/http/representation/RepresentationMetadata.test.ts b/test/unit/http/representation/RepresentationMetadata.test.ts index 5032f6f31..7ba58fc55 100644 --- a/test/unit/http/representation/RepresentationMetadata.test.ts +++ b/test/unit/http/representation/RepresentationMetadata.test.ts @@ -1,8 +1,9 @@ import 'jest-rdf'; +import type { BlankNode } from 'n3'; import { DataFactory } from 'n3'; import type { NamedNode, Quad } from 'rdf-js'; import { RepresentationMetadata } from '../../../../src/http/representation/RepresentationMetadata'; -import { CONTENT_TYPE } from '../../../../src/util/Vocabularies'; +import { CONTENT_TYPE, SOLID_META, RDFS } from '../../../../src/util/Vocabularies'; const { defaultGraph, literal, namedNode, quad } = DataFactory; // Helper functions to filter quads @@ -296,5 +297,74 @@ describe('A RepresentationMetadata', (): void => { metadata.add(CONTENT_TYPE, 'c/d'); expect((): any => metadata.contentType).toThrow(); }); + + it('has a shorthand for Content-Type as string.', async(): Promise => { + expect(metadata.contentType).toBeUndefined(); + expect(metadata.contentTypeObject).toBeUndefined(); + metadata.contentType = 'text/plain; charset=utf-8; test=value1'; + expect(metadata.contentTypeObject).toEqual({ + value: 'text/plain', + parameters: { + charset: 'utf-8', + test: 'value1', + }, + }); + }); + + it('has a shorthand for Content-Type as object.', async(): Promise => { + expect(metadata.contentType).toBeUndefined(); + expect(metadata.contentTypeObject).toBeUndefined(); + metadata.contentTypeObject = { + value: 'text/plain', + parameters: { + charset: 'utf-8', + test: 'value1', + }, + }; + expect(metadata.contentTypeObject).toEqual({ + value: 'text/plain', + parameters: { + charset: 'utf-8', + test: 'value1', + }, + }); + expect(metadata.contentType).toBe('text/plain'); + }); + + it('can properly clear the Content-Type parameters explicitly.', async(): Promise => { + expect(metadata.contentType).toBeUndefined(); + expect(metadata.contentTypeObject).toBeUndefined(); + metadata.contentType = 'text/plain; charset=utf-8; test=value1'; + metadata.contentType = undefined; + expect(metadata.contentType).toBeUndefined(); + expect(metadata.contentTypeObject).toBeUndefined(); + expect(metadata.quads(null, SOLID_META.terms.contentTypeParameter, null, null)).toHaveLength(0); + expect(metadata.quads(null, SOLID_META.terms.value, null, null)).toHaveLength(0); + expect(metadata.quads(null, RDFS.terms.label, null, null)).toHaveLength(0); + }); + + it('can properly clear the Content-Type parameters implicitly.', async(): Promise => { + expect(metadata.contentType).toBeUndefined(); + expect(metadata.contentTypeObject).toBeUndefined(); + metadata.contentType = 'text/plain; charset=utf-8; test=value1'; + metadata.contentType = 'text/turtle'; + expect(metadata.contentType).toBe('text/turtle'); + expect(metadata.contentTypeObject).toEqual({ + value: 'text/turtle', + parameters: {}, + }); + expect(metadata.quads(null, SOLID_META.terms.contentTypeParameter, null, null)).toHaveLength(0); + expect(metadata.quads(null, SOLID_META.terms.value, null, null)).toHaveLength(0); + expect(metadata.quads(null, RDFS.terms.label, null, null)).toHaveLength(0); + }); + + it('can return invalid parameters when too many quads are present.', async(): Promise => { + expect(metadata.contentType).toBeUndefined(); + expect(metadata.contentTypeObject).toBeUndefined(); + metadata.contentType = 'text/plain; charset=utf-8; test=value1'; + const param = metadata.quads(null, SOLID_META.terms.value)[0].subject; + metadata.addQuad(param as BlankNode, SOLID_META.terms.value, 'anomaly'); + expect(metadata.contentTypeObject?.parameters).toMatchObject({ invalid: '' }); + }); }); }); diff --git a/test/unit/util/HeaderUtil.test.ts b/test/unit/util/HeaderUtil.test.ts index 2e5c124fd..707251436 100644 --- a/test/unit/util/HeaderUtil.test.ts +++ b/test/unit/util/HeaderUtil.test.ts @@ -190,16 +190,31 @@ describe('HeaderUtil', (): void => { describe('#parseContentType', (): void => { const contentTypeTurtle = 'text/turtle'; + const contentTypePlain: any = { + value: 'text/plain', + parameters: { + charset: 'utf-8', + }, + }; it('handles single content-type parameter (with leading and trailing whitespaces).', (): void => { - expect(parseContentType('text/turtle').type).toEqual(contentTypeTurtle); - expect(parseContentType('text/turtle ').type).toEqual(contentTypeTurtle); - expect(parseContentType(' text/turtle').type).toEqual(contentTypeTurtle); + expect(parseContentType('text/turtle').value).toEqual(contentTypeTurtle); + expect(parseContentType('text/turtle ').value).toEqual(contentTypeTurtle); + expect(parseContentType(' text/turtle').value).toEqual(contentTypeTurtle); + expect(parseContentType('text/plain; charset=utf-8')).toEqual(contentTypePlain); + expect(parseContentType(' text/plain; charset=utf-8')).toEqual(contentTypePlain); + expect(parseContentType('text/plain ; charset=utf-8')).toEqual(contentTypePlain); + expect(parseContentType(' text/plain ; charset=utf-8')).toEqual(contentTypePlain); + expect(parseContentType(' text/plain ; charset="utf-8"')).toEqual(contentTypePlain); + expect(parseContentType(' text/plain ; charset = "utf-8"')).toEqual(contentTypePlain); }); it('handles multiple content-type parameters.', (): void => { - expect(parseContentType('text/turtle; charset=UTF-8').type).toEqual(contentTypeTurtle); + expect(parseContentType('text/turtle; charset=UTF-8').value).toEqual(contentTypeTurtle); + contentTypePlain.parameters.test = 'value1'; + expect(parseContentType('text/plain; charset=utf-8;test="value1"')).toEqual(contentTypePlain); }); }); + describe('#parseForwarded', (): void => { it('handles an empty set of headers.', (): void => { expect(parseForwarded({})).toEqual({});