feat: Track binary size of resources when possible

This commit is contained in:
Joachim Van Herwegen 2023-10-02 13:40:19 +02:00
parent 3e9adef4cf
commit 71e55690f3
15 changed files with 194 additions and 35 deletions

View File

@ -21,6 +21,7 @@
"Accept-Post",
"Accept-Put",
"Allow",
"Content-Range",
"ETag",
"Last-Modified",
"Link",

View File

@ -1,6 +1,8 @@
import { getLoggerFor } from '../../../logging/LogUtil';
import type { HttpResponse } from '../../../server/HttpResponse';
import { addHeader } from '../../../util/HeaderUtil';
import { SOLID_HTTP } from '../../../util/Vocabularies';
import { termToInt } from '../../../util/QuadUtil';
import { POSIX, SOLID_HTTP } from '../../../util/Vocabularies';
import type { RepresentationMetadata } from '../../representation/RepresentationMetadata';
import { MetadataWriter } from './MetadataWriter';
@ -10,16 +12,39 @@ import { MetadataWriter } from './MetadataWriter';
* According to the RFC, this is incorrect,
* but is all we can do as long as we don't know the full length of the representation in advance.
* For the same reason, the total length of the representation will always be `*`.
*
* This class also adds the content-length header.
* This will contain either the full size for standard requests,
* or the size of the slice for range requests.
*/
export class RangeMetadataWriter extends MetadataWriter {
protected readonly logger = getLoggerFor(this);
public async handle(input: { response: HttpResponse; metadata: RepresentationMetadata }): Promise<void> {
const unit = input.metadata.get(SOLID_HTTP.terms.unit);
const size = termToInt(input.metadata.get(POSIX.terms.size));
const unit = input.metadata.get(SOLID_HTTP.terms.unit)?.value;
if (!unit) {
if (typeof size === 'number') {
addHeader(input.response, 'Content-Length', `${size}`);
}
return;
}
const start = input.metadata.get(SOLID_HTTP.terms.start);
const end = input.metadata.get(SOLID_HTTP.terms.end);
addHeader(input.response, 'Content-Range', `${unit.value} ${start?.value ?? '*'}-${end?.value ?? '*'}/*`);
let start = termToInt(input.metadata.get(SOLID_HTTP.terms.start));
if (typeof start === 'number' && start < 0 && typeof size === 'number') {
start = size + start;
}
let end = termToInt(input.metadata.get(SOLID_HTTP.terms.end));
if (typeof end !== 'number' && typeof size === 'number') {
end = size - 1;
}
const rangeHeader = `${unit} ${start ?? '*'}-${end ?? '*'}/${size ?? '*'}`;
addHeader(input.response, 'Content-Range', rangeHeader);
if (typeof start === 'number' && typeof end === 'number') {
addHeader(input.response, 'Content-Length', `${end - start + 1}`);
} else {
this.logger.warn(`Generating invalid content-range header due to missing size information: ${rangeHeader}`);
}
}
}

View File

@ -5,9 +5,10 @@ import { getLoggerFor } from '../logging/LogUtil';
import { InternalServerError } from '../util/errors/InternalServerError';
import { RangeNotSatisfiedHttpError } from '../util/errors/RangeNotSatisfiedHttpError';
import { guardStream } from '../util/GuardedStream';
import { termToInt } from '../util/QuadUtil';
import { SliceStream } from '../util/SliceStream';
import { toLiteral } from '../util/TermUtil';
import { SOLID_HTTP, XSD } from '../util/Vocabularies';
import { POSIX, SOLID_HTTP, XSD } from '../util/Vocabularies';
import type { Conditions } from './Conditions';
import { PassthroughStore } from './PassthroughStore';
import type { ResourceStore } from './ResourceStore';
@ -51,10 +52,11 @@ export class BinarySliceResourceStore<T extends ResourceStore = ResourceStore> e
}
try {
const size = termToInt(result.metadata.get(POSIX.terms.size));
// The reason we don't determine the object mode based on the object mode of the parent stream
// is that `guardedStreamFrom` does not create object streams when inputting streams/buffers.
// Something to potentially update in the future.
result.data = guardStream(new SliceStream(result.data, { start, end, objectMode: false }));
result.data = guardStream(new SliceStream(result.data, { start, end, size, objectMode: false }));
} catch (error: unknown) {
// Creating the slice stream can throw an error if some of the parameters are unacceptable.
// Need to make sure the stream is closed in that case.

View File

@ -32,6 +32,9 @@ export interface DataAccessor {
/**
* Returns the metadata corresponding to the identifier.
* If possible, it is suggested to add a `posix:size` triple to the metadata indicating the binary size.
* This is necessary for range requests.
*
* @param identifier - Identifier for which the metadata is requested.
*/
getMetadata: (identifier: ResourceIdentifier) => Promise<RepresentationMetadata>;

View File

@ -8,6 +8,8 @@ import { NotFoundHttpError } from '../../util/errors/NotFoundHttpError';
import type { Guarded } from '../../util/GuardedStream';
import type { IdentifierStrategy } from '../../util/identifiers/IdentifierStrategy';
import { guardedStreamFrom } from '../../util/StreamUtil';
import { POSIX } from '../../util/Vocabularies';
import { isInternalContentType } from '../conversion/ConversionUtil';
import type { DataAccessor } from './DataAccessor';
interface DataEntry {
@ -59,9 +61,17 @@ export class InMemoryDataAccessor implements DataAccessor, SingleThreaded {
public async writeDocument(identifier: ResourceIdentifier, data: Guarded<Readable>, metadata: RepresentationMetadata):
Promise<void> {
const parent = this.getParentEntry(identifier);
parent.entries[identifier.path] = {
// Drain original stream and create copy
data: await arrayifyStream(data),
const dataArray = await arrayifyStream(data);
// Only add the size for binary streams, which are all streams that do not have an internal type.
if (metadata.contentType && !isInternalContentType(metadata.contentType)) {
const size = dataArray.reduce<number>((total, chunk: Buffer): number => total + chunk.length, 0);
metadata.set(POSIX.terms.size, `${size}`);
}
parent.entries[identifier.path] = {
data: dataArray,
metadata,
};
}

View File

@ -5,6 +5,7 @@ import type { ValuePreferences } from '../../http/representation/RepresentationP
import { getLoggerFor } from '../../logging/LogUtil';
import { BadRequestHttpError } from '../../util/errors/BadRequestHttpError';
import { NotImplementedHttpError } from '../../util/errors/NotImplementedHttpError';
import { POSIX } from '../../util/Vocabularies';
import { cleanPreferences, getBestPreference, getTypeWeight, preferencesToString } from './ConversionUtil';
import type { RepresentationConverterArgs } from './RepresentationConverter';
import { RepresentationConverter } from './RepresentationConverter';
@ -100,6 +101,13 @@ export class ChainedConverter extends RepresentationConverter {
args.preferences = { type: { [outTypes[i]]: 1 }};
args.representation = await match.converters[i].handle(args);
}
// For now, we assume any kind of conversion invalidates the stored byte length.
// In the future, we could let converters handle this individually, as some might know the size of the result.
if (match.converters.length > 0) {
args.representation.metadata.removeAll(POSIX.terms.size);
}
return args.representation;
}

View File

@ -3,7 +3,7 @@ import type { NamedNode } from '@rdfjs/types';
import arrayifyStream from 'arrayify-stream';
import type { ParserOptions } from 'n3';
import { StreamParser, StreamWriter } from 'n3';
import type { Quad } from 'rdf-js';
import type { Quad, Term } from 'rdf-js';
import type { Guarded } from './GuardedStream';
import { guardedStreamFrom, pipeSafely } from './StreamUtil';
import { toNamedTerm } from './TermUtil';
@ -45,6 +45,18 @@ export function uniqueQuads(quads: Quad[]): Quad[] {
}, []);
}
/**
* Converts a term to a number. Returns undefined if the term was undefined.
*
* @param term - Term to parse.
* @param radix - Radix to use when parsing. Default is 10.
*/
export function termToInt(term?: Term, radix = 10): number | undefined {
if (term) {
return Number.parseInt(term.value, radix);
}
}
/**
* Represents a triple pattern to be used as a filter.
*/

View File

@ -3,12 +3,19 @@ import { Transform } from 'stream';
import { RangeNotSatisfiedHttpError } from './errors/RangeNotSatisfiedHttpError';
import { pipeSafely } from './StreamUtil';
export interface SliceStreamOptions extends TransformOptions {
start: number;
end?: number;
size?: number;
}
/**
* A stream that slices a part out of another stream.
* `start` and `end` are inclusive.
* If `end` is not defined it is until the end of the stream.
* Does not support negative `start` values which would indicate slicing the end of the stream off,
* since we don't know the length of the input stream.
*
* Negative `start` values can be used to instead slice that many streams off the end of the stream.
* This requires the `size` field to be defined.
*
* Both object and non-object streams are supported.
* This needs to be explicitly specified,
@ -19,16 +26,29 @@ export class SliceStream extends Transform {
protected remainingSkip: number;
protected remainingRead: number;
public constructor(source: Readable, options: TransformOptions & { start: number; end?: number }) {
public constructor(source: Readable, options: SliceStreamOptions) {
super(options);
let start = options.start;
const end = options.end ?? Number.POSITIVE_INFINITY;
if (options.start < 0) {
throw new RangeNotSatisfiedHttpError('Slicing data at the end of a stream is not supported.');
if (typeof options.size !== 'number') {
throw new RangeNotSatisfiedHttpError('Slicing data at the end of a stream requires a known size.');
} else {
// `start` is a negative number here so need to add
start = options.size + start;
}
if (options.start >= end) {
}
if (start >= end) {
throw new RangeNotSatisfiedHttpError('Range start should be less than end.');
}
this.remainingSkip = options.start;
// Not using `end` variable as that could be infinity
if (typeof options.end === 'number' && typeof options.size === 'number' && options.end >= options.size) {
throw new RangeNotSatisfiedHttpError('Range end should be less than the total size.');
}
this.remainingSkip = start;
// End value is inclusive
this.remainingRead = end - options.start + 1;

View File

@ -733,20 +733,23 @@ describe.each(stores)('An LDP handler allowing all requests %s', (name, { storeC
let response = await fetch(resourceUrl, { headers: { range: 'bytes=0-5' }});
expect(response.status).toBe(206);
expect(response.headers.get('content-range')).toBe('bytes 0-5/*');
expect(response.headers.get('content-range')).toBe('bytes 0-5/10');
expect(response.headers.get('content-length')).toBe('6');
await expect(response.text()).resolves.toBe('012345');
response = await fetch(resourceUrl, { headers: { range: 'bytes=5-' }});
expect(response.status).toBe(206);
expect(response.headers.get('content-range')).toBe('bytes 5-*/*');
expect(response.headers.get('content-range')).toBe('bytes 5-9/10');
expect(response.headers.get('content-length')).toBe('5');
await expect(response.text()).resolves.toBe('56789');
response = await fetch(resourceUrl, { headers: { range: 'bytes=-4' }});
expect(response.status).toBe(206);
expect(response.headers.get('content-range')).toBe('bytes 6-9/10');
expect(response.headers.get('content-length')).toBe('4');
await expect(response.text()).resolves.toBe('6789');
response = await fetch(resourceUrl, { headers: { range: 'bytes=5-15' }});
expect(response.status).toBe(206);
expect(response.headers.get('content-range')).toBe('bytes 5-15/*');
await expect(response.text()).resolves.toBe('56789');
response = await fetch(resourceUrl, { headers: { range: 'bytes=-5' }});
expect(response.status).toBe(416);
});
});

View File

@ -2,7 +2,7 @@ import { createResponse } from 'node-mocks-http';
import { RangeMetadataWriter } from '../../../../../src/http/output/metadata/RangeMetadataWriter';
import { RepresentationMetadata } from '../../../../../src/http/representation/RepresentationMetadata';
import type { HttpResponse } from '../../../../../src/server/HttpResponse';
import { SOLID_HTTP } from '../../../../../src/util/Vocabularies';
import { POSIX, SOLID_HTTP } from '../../../../../src/util/Vocabularies';
describe('RangeMetadataWriter', (): void => {
let metadata: RepresentationMetadata;
@ -15,17 +15,19 @@ describe('RangeMetadataWriter', (): void => {
writer = new RangeMetadataWriter();
});
it('adds the content-range header.', async(): Promise<void> => {
it('adds the content-range and content-length header.', async(): Promise<void> => {
metadata.set(SOLID_HTTP.terms.unit, 'bytes');
metadata.set(SOLID_HTTP.terms.start, '1');
metadata.set(SOLID_HTTP.terms.end, '5');
metadata.set(POSIX.terms.size, '10');
await expect(writer.handle({ response, metadata })).resolves.toBeUndefined();
expect(response.getHeaders()).toEqual({
'content-range': 'bytes 1-5/*',
'content-range': 'bytes 1-5/10',
'content-length': '5',
});
});
it('uses * if the value is unknown.', async(): Promise<void> => {
it('uses * if a value is unknown.', async(): Promise<void> => {
metadata.set(SOLID_HTTP.terms.unit, 'bytes');
await expect(writer.handle({ response, metadata })).resolves.toBeUndefined();
expect(response.getHeaders()).toEqual({
@ -37,4 +39,34 @@ describe('RangeMetadataWriter', (): void => {
await expect(writer.handle({ response, metadata })).resolves.toBeUndefined();
expect(response.getHeaders()).toEqual({ });
});
it('adds a content-length header if the size is known.', async(): Promise<void> => {
metadata.set(POSIX.terms.size, '10');
await expect(writer.handle({ response, metadata })).resolves.toBeUndefined();
expect(response.getHeaders()).toEqual({
'content-length': '10',
});
});
it('correctly deduces end values if the size is known.', async(): Promise<void> => {
metadata.set(SOLID_HTTP.terms.unit, 'bytes');
metadata.set(SOLID_HTTP.terms.start, '4');
metadata.set(POSIX.terms.size, '10');
await expect(writer.handle({ response, metadata })).resolves.toBeUndefined();
expect(response.getHeaders()).toEqual({
'content-range': 'bytes 4-9/10',
'content-length': '6',
});
});
it('correctly handles negative start values.', async(): Promise<void> => {
metadata.set(SOLID_HTTP.terms.unit, 'bytes');
metadata.set(SOLID_HTTP.terms.start, '-4');
metadata.set(POSIX.terms.size, '10');
await expect(writer.handle({ response, metadata })).resolves.toBeUndefined();
expect(response.getHeaders()).toEqual({
'content-range': 'bytes 6-9/10',
'content-length': '4',
});
});
});

View File

@ -5,7 +5,7 @@ import { ResourceStore } from '../../../src/storage/ResourceStore';
import { InternalServerError } from '../../../src/util/errors/InternalServerError';
import { RangeNotSatisfiedHttpError } from '../../../src/util/errors/RangeNotSatisfiedHttpError';
import { readableToString } from '../../../src/util/StreamUtil';
import { SOLID_HTTP } from '../../../src/util/Vocabularies';
import { POSIX, SOLID_HTTP } from '../../../src/util/Vocabularies';
describe('A BinarySliceResourceStore', (): void => {
const identifier = { path: 'path' };
@ -31,6 +31,14 @@ describe('A BinarySliceResourceStore', (): void => {
expect(result.metadata.get(SOLID_HTTP.terms.end)?.value).toBe('4');
});
it('uses the stream size when slicing if available.', async(): Promise<void> => {
representation.metadata.set(POSIX.terms.size, '10');
const result = await store.getRepresentation(identifier, { range: { unit: 'bytes', parts: [{ start: -4 }]}});
await expect(readableToString(result.data)).resolves.toBe('6789');
expect(result.metadata.get(SOLID_HTTP.terms.unit)?.value).toBe('bytes');
expect(result.metadata.get(SOLID_HTTP.terms.start)?.value).toBe('-4');
});
it('does not add end metadata if there is none.', async(): Promise<void> => {
const result = await store.getRepresentation(identifier, { range: { unit: 'bytes', parts: [{ start: 5 }]}});
await expect(readableToString(result.data)).resolves.toBe('56789');

View File

@ -9,7 +9,7 @@ import { NotFoundHttpError } from '../../../../src/util/errors/NotFoundHttpError
import type { Guarded } from '../../../../src/util/GuardedStream';
import { BaseIdentifierStrategy } from '../../../../src/util/identifiers/BaseIdentifierStrategy';
import { guardedStreamFrom, readableToString } from '../../../../src/util/StreamUtil';
import { LDP, RDF } from '../../../../src/util/Vocabularies';
import { CONTENT_TYPE, LDP, POSIX, RDF } from '../../../../src/util/Vocabularies';
const { namedNode } = DataFactory;
class DummyStrategy extends BaseIdentifierStrategy {
@ -104,13 +104,18 @@ describe('An InMemoryDataAccessor', (): void => {
it('adds stored metadata when requesting document metadata.', async(): Promise<void> => {
const identifier = { path: `${base}resource` };
const inputMetadata = new RepresentationMetadata(identifier, { [RDF.type]: LDP.terms.Resource });
const inputMetadata = new RepresentationMetadata(identifier, {
[RDF.type]: LDP.terms.Resource,
[CONTENT_TYPE]: 'text/turtle',
});
await expect(accessor.writeDocument(identifier, data, inputMetadata)).resolves.toBeUndefined();
metadata = await accessor.getMetadata(identifier);
expect(metadata.identifier.value).toBe(`${base}resource`);
const quads = metadata.quads();
expect(quads).toHaveLength(1);
expect(quads[0].object.value).toBe(LDP.Resource);
expect(quads).toHaveLength(3);
expect(metadata.get(RDF.terms.type)).toEqual(LDP.terms.Resource);
expect(metadata.contentType).toBe('text/turtle');
expect(metadata.get(POSIX.terms.size)?.value).toBe('4');
});
it('adds stored metadata when requesting container metadata.', async(): Promise<void> => {

View File

@ -8,7 +8,7 @@ import { BaseTypedRepresentationConverter } from '../../../../src/storage/conver
import { ChainedConverter } from '../../../../src/storage/conversion/ChainedConverter';
import { matchesMediaType } from '../../../../src/storage/conversion/ConversionUtil';
import type { RepresentationConverterArgs } from '../../../../src/storage/conversion/RepresentationConverter';
import { CONTENT_TYPE } from '../../../../src/util/Vocabularies';
import { CONTENT_TYPE, POSIX } from '../../../../src/util/Vocabularies';
class DummyConverter extends BaseTypedRepresentationConverter {
private readonly inTypes: ValuePreferences;
@ -47,6 +47,7 @@ describe('A ChainedConverter', (): void => {
beforeEach(async(): Promise<void> => {
const metadata = new RepresentationMetadata('a/a');
metadata.set(POSIX.terms.size, '500');
representation = { metadata } as Representation;
preferences = { type: { 'x/x': 1, 'x/*': 0.8 }};
args = { representation, preferences, identifier: { path: 'path' }};
@ -81,6 +82,7 @@ describe('A ChainedConverter', (): void => {
const result = await converter.handle(args);
expect(result.metadata.contentType).toBe('b/b');
expect(result.metadata.get(POSIX.terms.size)?.value).toBe('500');
});
it('converts input matching the output preferences if a better output can be found.', async(): Promise<void> => {
@ -91,6 +93,7 @@ describe('A ChainedConverter', (): void => {
const result = await converter.handle(args);
expect(result.metadata.contentType).toBe('x/x');
expect(result.metadata.get(POSIX.terms.size)).toBeUndefined();
});
it('interprets no preferences as */*.', async(): Promise<void> => {
@ -101,10 +104,12 @@ describe('A ChainedConverter', (): void => {
let result = await converter.handle(args);
expect(result.metadata.contentType).toBe('b/b');
expect(result.metadata.get(POSIX.terms.size)?.value).toBe('500');
args.preferences.type = { };
result = await converter.handle(args);
expect(result.metadata.contentType).toBe('b/b');
expect(result.metadata.get(POSIX.terms.size)?.value).toBe('500');
});
it('can find paths of length 1.', async(): Promise<void> => {
@ -113,6 +118,7 @@ describe('A ChainedConverter', (): void => {
const result = await converter.handle(args);
expect(result.metadata.contentType).toBe('x/x');
expect(result.metadata.get(POSIX.terms.size)).toBeUndefined();
});
it('can find longer paths.', async(): Promise<void> => {
@ -126,6 +132,7 @@ describe('A ChainedConverter', (): void => {
const result = await converter.handle(args);
expect(result.metadata.contentType).toBe('x/x');
expect(result.metadata.get(POSIX.terms.size)).toBeUndefined();
});
it('will use the shortest path among the best found.', async(): Promise<void> => {
@ -147,6 +154,7 @@ describe('A ChainedConverter', (): void => {
}
const result = await converter.handle(args);
expect(result.metadata.contentType).toBe('x/x');
expect(result.metadata.get(POSIX.terms.size)).toBeUndefined();
expect(converters[0].handle).toHaveBeenCalledTimes(0);
expect(converters[1].handle).toHaveBeenCalledTimes(0);
expect(converters[2].handle).toHaveBeenCalledTimes(1);

View File

@ -1,6 +1,6 @@
import 'jest-rdf';
import { DataFactory } from 'n3';
import { parseQuads, serializeQuads, uniqueQuads } from '../../../src/util/QuadUtil';
import { parseQuads, serializeQuads, termToInt, uniqueQuads } from '../../../src/util/QuadUtil';
import { guardedStreamFrom, readableToString } from '../../../src/util/StreamUtil';
const { literal, namedNode, quad } = DataFactory;
@ -50,4 +50,15 @@ describe('QuadUtil', (): void => {
]);
});
});
describe('#termToInt', (): void => {
it('returns undefined if the input is undefined.', async(): Promise<void> => {
expect(termToInt()).toBeUndefined();
});
it('converts the term to a number.', async(): Promise<void> => {
expect(termToInt(namedNode('5'))).toBe(5);
expect(termToInt(namedNode('0xF'), 16)).toBe(15);
});
});
});

View File

@ -4,7 +4,7 @@ import { SliceStream } from '../../../src/util/SliceStream';
import { readableToString } from '../../../src/util/StreamUtil';
describe('A SliceStream', (): void => {
it('does not support suffix slicing.', async(): Promise<void> => {
it('does not support suffix slicing if the size is unknown.', async(): Promise<void> => {
expect((): unknown => new SliceStream(Readable.from('0123456789'), { start: -5 }))
.toThrow(RangeNotSatisfiedHttpError);
});
@ -16,6 +16,11 @@ describe('A SliceStream', (): void => {
.toThrow(RangeNotSatisfiedHttpError);
});
it('requires the end to be less than the size.', async(): Promise<void> => {
expect((): unknown => new SliceStream(Readable.from('0123456789'), { start: 5, end: 6, size: 6 }))
.toThrow(RangeNotSatisfiedHttpError);
});
it('can slice binary streams.', async(): Promise<void> => {
await expect(readableToString(new SliceStream(Readable.from('0123456789', { objectMode: false }),
{ start: 3, end: 7, objectMode: false }))).resolves.toBe('34567');
@ -25,6 +30,9 @@ describe('A SliceStream', (): void => {
await expect(readableToString(new SliceStream(Readable.from('0123456789', { objectMode: false }),
{ start: 3, end: 20, objectMode: false }))).resolves.toBe('3456789');
await expect(readableToString(new SliceStream(Readable.from('0123456789', { objectMode: false }),
{ start: -3, size: 10, objectMode: false }))).resolves.toBe('789');
});
it('can slice object streams.', async(): Promise<void> => {
@ -37,5 +45,8 @@ describe('A SliceStream', (): void => {
await expect(readableToString(new SliceStream(Readable.from(arr, { objectMode: true }),
{ start: 3, end: 20, objectMode: true }))).resolves.toBe('3456789');
await expect(readableToString(new SliceStream(Readable.from(arr, { objectMode: true }),
{ start: -3, size: 10, objectMode: true }))).resolves.toBe('789');
});
});