From 9d9f7df5d18b035b036e78ae79019f79b86d9818 Mon Sep 17 00:00:00 2001 From: Joachim Van Herwegen Date: Mon, 6 Jul 2020 12:05:19 +0200 Subject: [PATCH] feat: Fully support Accept* headers --- src/ldp/http/AcceptPreferenceParser.ts | 62 +++++ src/ldp/http/SimplePreferenceParser.ts | 46 ---- src/util/AcceptParser.ts | 219 ++++++++++++++++++ .../AuthenticatedLdpHandler.test.ts | 4 +- test/integration/RequestParser.test.ts | 4 +- ...test.ts => AcceptPreferenceParser.test.ts} | 13 +- test/unit/util/AcceptParser.test.ts | 70 ++++++ 7 files changed, 364 insertions(+), 54 deletions(-) create mode 100644 src/ldp/http/AcceptPreferenceParser.ts delete mode 100644 src/ldp/http/SimplePreferenceParser.ts create mode 100644 src/util/AcceptParser.ts rename test/unit/ldp/http/{SimplePreferenceParser.test.ts => AcceptPreferenceParser.test.ts} (70%) create mode 100644 test/unit/util/AcceptParser.test.ts diff --git a/src/ldp/http/AcceptPreferenceParser.ts b/src/ldp/http/AcceptPreferenceParser.ts new file mode 100644 index 000000000..9d1f9ef0c --- /dev/null +++ b/src/ldp/http/AcceptPreferenceParser.ts @@ -0,0 +1,62 @@ +import { HttpRequest } from '../../server/HttpRequest'; +import { PreferenceParser } from './PreferenceParser'; +import { RepresentationPreference } from '../representation/RepresentationPreference'; +import { RepresentationPreferences } from '../representation/RepresentationPreferences'; +import { + AcceptHeader, + parseAccept, + parseAcceptCharset, + parseAcceptEncoding, + parseAcceptLanguage, +} from '../../util/AcceptParser'; + +/** + * Extracts preferences from the accept-* headers from an incoming {@link HttpRequest}. + * Supports Accept, Accept-Charset, Accept-Encoding, Accept-Language and Accept-DateTime. + */ +export class AcceptPreferenceParser extends PreferenceParser { + public constructor() { + super(); + } + + public async canHandle(): Promise { + return undefined; + } + + public async handle(input: HttpRequest): Promise { + const result: RepresentationPreferences = {}; + const headers: { [T in keyof RepresentationPreferences]: { val: string; func: (input: string) => AcceptHeader[] }} = { + type: { val: input.headers.accept, func: parseAccept }, + charset: { val: input.headers['accept-charset'] as string, func: parseAcceptCharset }, + encoding: { val: input.headers['accept-encoding'] as string, func: parseAcceptEncoding }, + language: { val: input.headers['accept-language'], func: parseAcceptLanguage }, + }; + Object.keys(headers).forEach((key: keyof RepresentationPreferences): void => { + const preferences = this.parseHeader(headers[key].val, headers[key].func); + if (preferences.length > 0) { + result[key] = preferences; + } + }); + + // Accept-DateTime is currently specified to simply have a datetime as value + if (input.headers['accept-datetime']) { + result.datetime = [{ value: input.headers['accept-datetime'] as string, weight: 1 }]; + } + + return result; + } + + /** + * Converts a header string using the given parse function to {@link RepresentationPreference}[]. + * @param input - Input header string. + * @param parseFunction - Function that converts header string to {@link AcceptHeader}. + * + * @returns A list of {@link RepresentationPreference}. Returns an empty list if input was not defined. + */ + private parseHeader(input: string, parseFunction: (input: string) => AcceptHeader[]): RepresentationPreference[] { + if (!input) { + return []; + } + return parseFunction(input).map((accept): RepresentationPreference => ({ value: accept.range, weight: accept.weight })); + } +} diff --git a/src/ldp/http/SimplePreferenceParser.ts b/src/ldp/http/SimplePreferenceParser.ts deleted file mode 100644 index 0700994b8..000000000 --- a/src/ldp/http/SimplePreferenceParser.ts +++ /dev/null @@ -1,46 +0,0 @@ -import { HttpRequest } from '../../server/HttpRequest'; -import { PreferenceParser } from './PreferenceParser'; -import { RepresentationPreference } from '../representation/RepresentationPreference'; -import { RepresentationPreferences } from '../representation/RepresentationPreferences'; - -/** - * Extracts preferences from the accept-* headers from an incoming {@link HttpRequest}. - * Parsing of header strings is done naively. - */ -export class SimplePreferenceParser extends PreferenceParser { - public constructor() { - super(); - } - - public async canHandle(): Promise { - return undefined; - } - - public async handle(input: HttpRequest): Promise { - const type = this.parseHeader(input.headers.accept); - const charset = this.parseHeader(input.headers['accept-charset'] as string); - const language = this.parseHeader(input.headers['accept-language']); - - // Datetime can have commas so requires separate rules - let datetime; - if (input.headers['accept-datetime']) { - datetime = [{ value: input.headers['accept-datetime'] as string, weight: 1 }]; - } - - return { type, charset, datetime, language }; - } - - private parseHeader(header: string): RepresentationPreference[] { - if (!header) { - return undefined; - } - - return header.split(',').map((preference): RepresentationPreference => { - const parts = preference.split(';'); - if (parts.length === 1) { - return { value: parts[0].trim(), weight: 1 }; - } - return { value: parts[0].trim(), weight: parseFloat(parts[1].trim().slice('q='.length)) }; - }); - } -} diff --git a/src/util/AcceptParser.ts b/src/util/AcceptParser.ts new file mode 100644 index 000000000..b76fef840 --- /dev/null +++ b/src/util/AcceptParser.ts @@ -0,0 +1,219 @@ +// BNF based on https://tools.ietf.org/html/rfc7231 +// +// Accept = #( media-range [ accept-params ] ) +// Accept-Charset = 1#( ( charset / "*" ) [ weight ] ) +// Accept-Encoding = #( codings [ weight ] ) +// Accept-Language = 1#( language-range [ weight ] ) +// +// Content-Type = media-type +// media-type = type "/" subtype *( OWS ";" OWS parameter ) +// +// media-range = ( "*/*" +// / ( type "/" "*" ) +// / ( type "/" subtype ) +// ) *( OWS ";" OWS parameter ) ; media type parameters +// accept-params = weight *( accept-ext ) +// accept-ext = OWS ";" OWS token [ "=" ( token / quoted-string ) ] ; extension parameters +// +// weight = OWS ";" OWS "q=" qvalue +// qvalue = ( "0" [ "." 0*3DIGIT ] ) +// / ( "1" [ "." 0*3("0") ] ) +// +// type = token +// subtype = token +// parameter = token "=" ( token / quoted-string ) +// +// quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE +// qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text +// obs-text = %x80-FF +// quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) +// +// charset = token +// +// codings = content-coding / "identity" / "*" +// content-coding = token +// +// language-range = (1*8ALPHA *("-" 1*8alphanum)) / "*" +// alphanum = ALPHA / DIGIT +// +// Delimiters are chosen from the set of US-ASCII visual characters not allowed in a token (DQUOTE and "(),/:;<=>?@[\]{}"). +// token = 1*tchar +// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +// / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +// / DIGIT / ALPHA +// ; any VCHAR, except delimiters +// + +// INTERFACES +/** + * General interface for all Accept* headers. + */ +export interface AcceptHeader { + /** Requested range. Can be a specific value or `*`, matching all. */ + range: string; + /** Weight of the preference [0, 1]. */ + weight: number; +} + +/** + * Contents of an HTTP Accept header. + * Range is type/subtype. Both can be `*`. + */ +export interface Accept extends AcceptHeader { + parameters: { + /** Media type parameters. These are the parameters that came before the q value. */ + mediaType: { [key: string]: string }; + /** Extension parameters. These are the parameters that came after the q value. Value will be an empty string if there was none. */ + extension: { [key: string]: string }; + }; +} + +/** + * Contents of an HTTP Accept-Charset header. + */ +export interface AcceptCharset extends AcceptHeader { } + +/** + * Contents of an HTTP Accept-Encoding header. + */ +export interface AcceptEncoding extends AcceptHeader { } + +/** + * Contents of an HTTP Accept-Language header. + */ +export interface AcceptLanguage extends AcceptHeader { } + +// HELPER FUNCTIONS +/** + * Replaces all double quoted strings in the input string with `"0"`, `"1"`, etc. + * @param input - The Accept header string. + * + * @returns The transformed string and a map with keys `"0"`, etc. and values the original string that was there. + */ +const transformQuotedStrings = (input: string): { result: string; replacements: { [id: string]: string } } => { + let idx = 0; + const replacements: { [id: string]: string } = {}; + const result = input.replace(/"(?:[^"\\]|\\.)*"/gu, (match): string => { + const replacement = `"${idx}"`; + replacements[replacement] = match; + idx += 1; + return replacement; + }); + return { result, replacements }; +}; + +const splitAndClean = (input: string): string[] => + input.split(',') + .map((part): string => part.trim()) + .filter((part): boolean => part.length > 0); + +/** + * Parses a single media range with corresponding parameters from an Accept header. + * For every parameter value that is a double quoted string, + * we check if it is a key in the replacements map. + * If yes the value from the map gets inserted instead. + * @param part - A string corresponding to a media range and its corresponding parameters. + * @param replacements - The double quoted strings that need to be replaced. + * + * @returns {@link Accept} object corresponding to the header string. + */ +const parseAcceptPart = (part: string, replacements: { [id: string]: string }): Accept => { + const [ range, ...parameters ] = part.split(';').map((param): string => param.trim()); + let weight = 1; + const mediaTypeParams: { [key: string]: string } = {}; + const extensionParams: { [key: string]: string } = {}; + let map = mediaTypeParams; + parameters.forEach((param): void => { + const [ name, value ] = param.split('='); + let actualValue = value; + + if (value && value.length > 0 && value.startsWith('"') && replacements[value]) { + actualValue = replacements[value]; + } + + if (name === 'q') { + // Extension parameters appear after the q value + map = extensionParams; + weight = parseFloat(actualValue); + } else { + // Value is optional for extension parameters + map[name] = actualValue || ''; + } + }); + + return { + range, + weight, + parameters: { + mediaType: mediaTypeParams, + extension: extensionParams, + }, + }; +}; + +/** + * Parses an Accept-* header where each part is only a value and a weight, so roughly /.*(q=.*)?/ separated by commas. + * @param input - Input header string. + * + * @returns An array of ranges and weights. + */ +const parseNoParameters = (input: string): { range: string; weight: number }[] => { + const parts = splitAndClean(input); + + return parts.map((part): { range: string; weight: number } => { + const [ range, qvalue ] = part.split(';').map((param): string => param.trim()); + const result = { range, weight: 1 }; + if (qvalue) { + result.weight = parseFloat(qvalue.split('=')[1]); + } + return result; + }).sort((left, right): number => right.weight - left.weight); +}; + +// EXPORTED FUNCTIONS + +/** + * Parses an Accept header string. + * No validation is done so this assumes a valid input string. + * + * @param input - The Accept header string. + * + * @returns An array of {@link Accept} objects, sorted by weight. + */ +export const parseAccept = (input: string): Accept[] => { + // Quoted strings could prevent split from having correct results + const { result, replacements } = transformQuotedStrings(input); + return splitAndClean(result) + .map((part): Accept => parseAcceptPart(part, replacements)) + .sort((left, right): number => right.weight - left.weight); +}; + +/** + * Parses an Accept-Charset header string. + * No validation is done so this assumes a valid input string. + * + * @param input - The Accept-Charset header string. + * + * @returns An array of {@link AcceptCharset} objects, sorted by weight. + */ +export const parseAcceptCharset = (input: string): AcceptCharset[] => parseNoParameters(input); + +/** + * Parses an Accept-Encoding header string. + * No validation is done so this assumes a valid input string. + * + * @param input - The Accept-Encoding header string. + * + * @returns An array of {@link AcceptEncoding} objects, sorted by weight. + */ +export const parseAcceptEncoding = (input: string): AcceptEncoding[] => parseNoParameters(input); + +/** + * Parses an Accept-Language header string. + * No validation is done so this assumes a valid input string. + * + * @param input - The Accept-Language header string. + * + * @returns An array of {@link AcceptLanguage} objects, sorted by weight. + */ +export const parseAcceptLanguage = (input: string): AcceptLanguage[] => parseNoParameters(input); diff --git a/test/integration/AuthenticatedLdpHandler.test.ts b/test/integration/AuthenticatedLdpHandler.test.ts index c87169b3f..25069ac72 100644 --- a/test/integration/AuthenticatedLdpHandler.test.ts +++ b/test/integration/AuthenticatedLdpHandler.test.ts @@ -1,3 +1,4 @@ +import { AcceptPreferenceParser } from '../../src/ldp/http/AcceptPreferenceParser'; import { AuthenticatedLdpHandler } from '../../src/ldp/AuthenticatedLdpHandler'; import { CompositeAsyncHandler } from '../../src/util/CompositeAsyncHandler'; import { EventEmitter } from 'events'; @@ -11,7 +12,6 @@ import { SimpleDeleteOperationHandler } from '../../src/ldp/operations/SimpleDel import { SimpleGetOperationHandler } from '../../src/ldp/operations/SimpleGetOperationHandler'; import { SimplePermissionsExtractor } from '../../src/ldp/permissions/SimplePermissionsExtractor'; import { SimplePostOperationHandler } from '../../src/ldp/operations/SimplePostOperationHandler'; -import { SimplePreferenceParser } from '../../src/ldp/http/SimplePreferenceParser'; import { SimpleRequestParser } from '../../src/ldp/http/SimpleRequestParser'; import { SimpleResourceStore } from '../../src/storage/SimpleResourceStore'; import { SimpleResponseWriter } from '../../src/ldp/http/SimpleResponseWriter'; @@ -25,7 +25,7 @@ describe('An AuthenticatedLdpHandler with instantiated handlers', (): void => { beforeEach(async(): Promise => { const requestParser = new SimpleRequestParser({ targetExtractor: new SimpleTargetExtractor(), - preferenceParser: new SimplePreferenceParser(), + preferenceParser: new AcceptPreferenceParser(), bodyParser: new SimpleBodyParser(), }); diff --git a/test/integration/RequestParser.test.ts b/test/integration/RequestParser.test.ts index a2973b4b7..d631b511f 100644 --- a/test/integration/RequestParser.test.ts +++ b/test/integration/RequestParser.test.ts @@ -1,7 +1,7 @@ +import { AcceptPreferenceParser } from '../../src/ldp/http/AcceptPreferenceParser'; import arrayifyStream from 'arrayify-stream'; import { HttpRequest } from '../../src/server/HttpRequest'; import { SimpleBodyParser } from '../../src/ldp/http/SimpleBodyParser'; -import { SimplePreferenceParser } from '../../src/ldp/http/SimplePreferenceParser'; import { SimpleRequestParser } from '../../src/ldp/http/SimpleRequestParser'; import { SimpleTargetExtractor } from '../../src/ldp/http/SimpleTargetExtractor'; import streamifyArray from 'streamify-array'; @@ -11,7 +11,7 @@ import { namedNode, triple } from '@rdfjs/data-model'; describe('A SimpleRequestParser with simple input parsers', (): void => { const targetExtractor = new SimpleTargetExtractor(); const bodyParser = new SimpleBodyParser(); - const preferenceParser = new SimplePreferenceParser(); + const preferenceParser = new AcceptPreferenceParser(); const requestParser = new SimpleRequestParser({ targetExtractor, bodyParser, preferenceParser }); it('can parse an incoming request.', async(): Promise => { diff --git a/test/unit/ldp/http/SimplePreferenceParser.test.ts b/test/unit/ldp/http/AcceptPreferenceParser.test.ts similarity index 70% rename from test/unit/ldp/http/SimplePreferenceParser.test.ts rename to test/unit/ldp/http/AcceptPreferenceParser.test.ts index f4fd48e20..5189c662f 100644 --- a/test/unit/ldp/http/SimplePreferenceParser.test.ts +++ b/test/unit/ldp/http/AcceptPreferenceParser.test.ts @@ -1,8 +1,8 @@ +import { AcceptPreferenceParser } from '../../../../src/ldp/http/AcceptPreferenceParser'; import { HttpRequest } from '../../../../src/server/HttpRequest'; -import { SimplePreferenceParser } from '../../../../src/ldp/http/SimplePreferenceParser'; -describe('A SimplePreferenceParser', (): void => { - const preferenceParser = new SimplePreferenceParser(); +describe('An AcceptPreferenceParser', (): void => { + const preferenceParser = new AcceptPreferenceParser(); it('can handle all input.', async(): Promise => { await expect(preferenceParser.canHandle()).resolves.toBeUndefined(); @@ -14,7 +14,7 @@ describe('A SimplePreferenceParser', (): void => { it('parses accept headers.', async(): Promise => { await expect(preferenceParser.handle({ headers: { accept: 'audio/*; q=0.2, audio/basic' }} as HttpRequest)) - .resolves.toEqual({ type: [{ value: 'audio/*', weight: 0.2 }, { value: 'audio/basic', weight: 1 }]}); + .resolves.toEqual({ type: [{ value: 'audio/basic', weight: 1 }, { value: 'audio/*', weight: 0.2 }]}); }); it('parses accept-charset headers.', async(): Promise => { @@ -27,6 +27,11 @@ describe('A SimplePreferenceParser', (): void => { .resolves.toEqual({ datetime: [{ value: 'Tue, 20 Mar 2001 20:35:00 GMT', weight: 1 }]}); }); + it('parses accept-encoding headers.', async(): Promise => { + await expect(preferenceParser.handle({ headers: { 'accept-encoding': 'gzip;q=1.0, identity; q=0.5, *;q=0' }} as unknown as HttpRequest)) + .resolves.toEqual({ encoding: [{ value: 'gzip', weight: 1 }, { value: 'identity', weight: 0.5 }, { value: '*', weight: 0 }]}); + }); + it('parses accept-language headers.', async(): Promise => { await expect(preferenceParser.handle({ headers: { 'accept-language': 'da, en-gb;q=0.8, en;q=0.7' }} as HttpRequest)) .resolves.toEqual({ language: [{ value: 'da', weight: 1 }, { value: 'en-gb', weight: 0.8 }, { value: 'en', weight: 0.7 }]}); diff --git a/test/unit/util/AcceptParser.test.ts b/test/unit/util/AcceptParser.test.ts new file mode 100644 index 000000000..fdfbac4ed --- /dev/null +++ b/test/unit/util/AcceptParser.test.ts @@ -0,0 +1,70 @@ +import { parseAccept, parseAcceptCharset, parseAcceptLanguage } from '../../../src/util/AcceptParser'; + +describe('AcceptParser', (): void => { + describe('parseAccept function', (): void => { + it('parses empty Accept headers.', async(): Promise => { + expect(parseAccept('')).toEqual([]); + }); + + it('parses Accept headers with a single entry.', async(): Promise => { + expect(parseAccept('audio/basic')).toEqual([ + { range: 'audio/basic', weight: 1, parameters: { mediaType: {}, extension: {}}}, + ]); + }); + + it('parses Accept headers with multiple entries.', async(): Promise => { + expect(parseAccept('audio/*; q=0.2, audio/basic')).toEqual([ + { range: 'audio/basic', weight: 1, parameters: { mediaType: {}, extension: {}}}, + { range: 'audio/*', weight: 0.2, parameters: { mediaType: {}, extension: {}}}, + ]); + }); + + it('parses complex Accept headers.', async(): Promise => { + expect(parseAccept('text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4,text/x-dvi; q=.8; mxb=100000; mxt')).toEqual([ + { range: 'text/html', weight: 1, parameters: { mediaType: { level: '1' }, extension: {}}}, + { range: 'text/x-dvi', weight: 0.8, parameters: { mediaType: {}, extension: { mxb: '100000', mxt: '' }}}, + { range: 'text/html', weight: 0.7, parameters: { mediaType: {}, extension: {}}}, + { range: 'text/html', weight: 0.4, parameters: { mediaType: { level: '2' }, extension: {}}}, + ]); + }); + + it('parses Accept headers with double quoted values.', async(): Promise => { + expect(parseAccept('audio/basic; param1="val" ; q=0.5 ;param2="\\\\\\"valid"')).toEqual([ + { range: 'audio/basic', weight: 0.5, parameters: { mediaType: { param1: '"val"' }, extension: { param2: '"\\\\\\"valid"' }}}, + ]); + }); + }); + + describe('parseCharset function', (): void => { + it('parses Accept-Charset headers.', async(): Promise => { + expect(parseAcceptCharset('iso-8859-5, unicode-1-1;q=0.8')).toEqual([ + { range: 'iso-8859-5', weight: 1 }, + { range: 'unicode-1-1', weight: 0.8 }, + ]); + }); + }); + + describe('parseEncoding function', (): void => { + it('parses empty Accept-Encoding headers.', async(): Promise => { + expect(parseAcceptCharset('')).toEqual([]); + }); + + it('parses Accept-Encoding headers.', async(): Promise => { + expect(parseAcceptCharset('gzip;q=1.0, identity; q=0.5, *;q=0')).toEqual([ + { range: 'gzip', weight: 1 }, + { range: 'identity', weight: 0.5 }, + { range: '*', weight: 0 }, + ]); + }); + }); + + describe('parseLanguage function', (): void => { + it('parses Accept-Language headers.', async(): Promise => { + expect(parseAcceptLanguage('da, en-gb;q=0.8, en;q=0.7')).toEqual([ + { range: 'da', weight: 1 }, + { range: 'en-gb', weight: 0.8 }, + { range: 'en', weight: 0.7 }, + ]); + }); + }); +});