feat: Use new MaxKeyLengthStorage to prevent keys that are too long

This commit is contained in:
Joachim Van Herwegen 2023-10-10 14:57:54 +02:00
parent e1c5189cb8
commit b5a61cbb08
7 changed files with 195 additions and 47 deletions

View File

@ -5,7 +5,10 @@
],
"@graph": [
{
"comment": "A storage that writes directly to a low-level store. This is necessary to prevent infinite loops with stores that also use storage.",
"comment": [
"A storage that writes directly to a low-level store. This is necessary to prevent infinite loops with stores that also use storage.",
"Currently only used for storing locks, where keys are always hashed, which is why this is not wrapped by a MaxKeyLengthStorage for now."
],
"@id": "urn:solid-server:default:BackendKeyValueStorage",
"@type": "JsonResourceStorage",
"source": { "@id": "urn:solid-server:default:ResourceStore_Backend" },
@ -15,10 +18,14 @@
{
"comment": "Internal value storage.",
"@id": "urn:solid-server:default:KeyValueStorage",
"@type": "JsonResourceStorage",
"source": { "@id": "urn:solid-server:default:ResourceStore" },
"baseUrl": { "@id": "urn:solid-server:default:variable:baseUrl" },
"container": "/.internal/"
"@type": "MaxKeyLengthStorage",
"source": {
"@id": "urn:solid-server:default:JsonResourceStorage",
"@type": "JsonResourceStorage",
"source": { "@id": "urn:solid-server:default:ResourceStore" },
"baseUrl": { "@id": "urn:solid-server:default:variable:baseUrl" },
"container": "/.internal/"
}
},
{
"comment": "Block external access to the storage containers to avoid exposing internal data.",

View File

@ -473,6 +473,7 @@ export * from './storage/keyvalue/IndexedStorage';
export * from './storage/keyvalue/JsonFileStorage';
export * from './storage/keyvalue/JsonResourceStorage';
export * from './storage/keyvalue/KeyValueStorage';
export * from './storage/keyvalue/MaxKeyLengthStorage';
export * from './storage/keyvalue/MemoryMapStorage';
export * from './storage/keyvalue/PassthroughKeyValueStorage';
export * from './storage/keyvalue/WrappedExpiringStorage';

View File

@ -1,19 +1,14 @@
import { createHash } from 'crypto';
import { parse } from 'path';
import { BasicRepresentation } from '../../http/representation/BasicRepresentation';
import type { Representation } from '../../http/representation/Representation';
import type { ResourceIdentifier } from '../../http/representation/ResourceIdentifier';
import { getLoggerFor } from '../../logging/LogUtil';
import { NotFoundHttpError } from '../../util/errors/NotFoundHttpError';
import { ensureLeadingSlash, ensureTrailingSlash, isContainerIdentifier, joinUrl,
joinFilePath } from '../../util/PathUtil';
import { ensureTrailingSlash, isContainerIdentifier, joinUrl, trimLeadingSlashes } from '../../util/PathUtil';
import { readableToString } from '../../util/StreamUtil';
import { LDP } from '../../util/Vocabularies';
import type { ResourceStore } from '../ResourceStore';
import type { KeyValueStorage } from './KeyValueStorage';
// Maximum allowed length for the keys, longer keys will be hashed.
const KEY_LENGTH_LIMIT = 255;
/**
* A {@link KeyValueStorage} for JSON-like objects using a {@link ResourceStore} as backend.
*
@ -27,6 +22,8 @@ const KEY_LENGTH_LIMIT = 255;
* All non-404 errors will be re-thrown.
*/
export class JsonResourceStorage<T> implements KeyValueStorage<string, T> {
protected readonly logger = getLoggerFor(this);
protected readonly source: ResourceStore;
protected readonly container: string;
@ -120,15 +117,6 @@ export class JsonResourceStorage<T> implements KeyValueStorage<string, T> {
* Converts a key into an identifier for internal storage.
*/
protected keyToIdentifier(key: string): ResourceIdentifier {
// Parse the key as a file path
const parsedPath = parse(key);
// Hash long filenames to prevent issues with the underlying storage.
// E.g. a UNIX a file name cannot exceed 255 bytes.
// This is a temporary fix for https://github.com/CommunitySolidServer/CommunitySolidServer/issues/1013,
// until we have a solution for data migration.
if (parsedPath.base.length > KEY_LENGTH_LIMIT) {
key = joinFilePath(parsedPath.dir, this.applyHash(parsedPath.base));
}
return { path: joinUrl(this.container, key) };
}
@ -142,8 +130,4 @@ export class JsonResourceStorage<T> implements KeyValueStorage<string, T> {
// on the `entries` results matching a key that was sent before.
return ensureLeadingSlash(identifier.path.slice(this.container.length));
}
protected applyHash(key: string): string {
return createHash('sha256').update(key).digest('hex');
}
}

View File

@ -1,5 +1,6 @@
/**
* A simple storage solution that can be used for internal values that need to be stored.
* To prevent potential issues, keys should be urlencoded before calling the storage.
*/
export interface KeyValueStorage<TKey, TValue> {
/**

View File

@ -0,0 +1,92 @@
import { createHash } from 'crypto';
import { getLoggerFor } from '../../logging/LogUtil';
import { NotImplementedHttpError } from '../../util/errors/NotImplementedHttpError';
import type { KeyValueStorage } from './KeyValueStorage';
type Entry<T> = {
key: string;
payload: T;
};
/**
* A {@link KeyValueStorage} that hashes keys in case they would be longer than the set limit.
* Hashed keys are prefixed with a certain value to prevent issues with incoming keys that are already hashed.
* The default max length is 150 and the default prefix is `$hash$`.
*
* This class mostly exists to prevent issues when writing storage entries to disk.
* Keys that are too long would cause issues with the file name limit.
* For this reason, only the part after the last `/` in a key is hashed, to preserve the expected file structure.
*/
export class MaxKeyLengthStorage<T> implements KeyValueStorage<string, T> {
protected readonly logger = getLoggerFor(this);
protected readonly source: KeyValueStorage<string, Entry<T>>;
protected readonly maxKeyLength: number;
protected readonly hashPrefix: string;
public constructor(source: KeyValueStorage<string, Entry<T>>, maxKeyLength = 150, hashPrefix = '$hash$') {
this.source = source;
this.maxKeyLength = maxKeyLength;
this.hashPrefix = hashPrefix;
}
public async has(key: string): Promise<boolean> {
return this.source.has(this.getKey(key));
}
public async get(key: string): Promise<T | undefined> {
return (await this.source.get(this.getKey(key)))?.payload;
}
public async set(key: string, value: T): Promise<this> {
await this.source.set(this.getKeyWithCheck(key), this.wrapPayload(key, value));
return this;
}
public async delete(key: string): Promise<boolean> {
return this.source.delete(this.getKey(key));
}
public async* entries(): AsyncIterableIterator<[string, T]> {
for await (const [ , val ] of this.source.entries()) {
yield [ val.key, val.payload ];
}
}
protected wrapPayload(key: string, payload: T): Entry<T> {
return { key, payload };
}
/**
* Similar to `getKey` but checks to make sure the key does not already contain the prefix.
* Only necessary for `set` calls.
*/
protected getKeyWithCheck(key: string): string {
const parts = key.split('/');
// Prevent non-hashed keys with the prefix to prevent false hits
if (parts[parts.length - 1].startsWith(this.hashPrefix)) {
throw new NotImplementedHttpError(`Unable to store keys starting with ${this.hashPrefix}`);
}
return this.getKey(key, parts);
}
/**
* Hashes the last part of the key if it is too long.
* Otherwise, just returns the key.
*/
protected getKey(key: string, parts?: string[]): string {
if (key.length <= this.maxKeyLength) {
return key;
}
// Hash the key if it is too long
parts = parts ?? key.split('/');
const last = parts.length - 1;
parts[last] = `${this.hashPrefix}${createHash('sha256').update(parts[last]).digest('hex')}`;
const newKey = parts.join('/');
this.logger.debug(`Hashing key ${key} to ${newKey}`);
return newKey;
}
}

View File

@ -1,4 +1,3 @@
import { createHash } from 'crypto';
import { BasicRepresentation } from '../../../../src/http/representation/BasicRepresentation';
import type { Representation } from '../../../../src/http/representation/Representation';
import { RepresentationMetadata } from '../../../../src/http/representation/RepresentationMetadata';
@ -6,7 +5,7 @@ import type { ResourceIdentifier } from '../../../../src/http/representation/Res
import { JsonResourceStorage } from '../../../../src/storage/keyvalue/JsonResourceStorage';
import type { ResourceStore } from '../../../../src/storage/ResourceStore';
import { NotFoundHttpError } from '../../../../src/util/errors/NotFoundHttpError';
import { isContainerIdentifier, joinUrl } from '../../../../src/util/PathUtil';
import { isContainerIdentifier } from '../../../../src/util/PathUtil';
import { readableToString } from '../../../../src/util/StreamUtil';
import { LDP } from '../../../../src/util/Vocabularies';
@ -123,24 +122,4 @@ describe('A JsonResourceStorage', (): void => {
[ subPath, 'subDocument' ],
]);
});
it('converts keys that would result in too large filenames into an identifier that uses a hash.',
async(): Promise<void> => {
const longFileName = `${'sometext'.repeat(32)}.json`;
const b64LongFileName = Buffer.from(longFileName).toString('base64');
const longKey = `/container/${b64LongFileName}`;
const longKeyId = joinUrl(subContainerIdentifier, createHash('sha256').update(b64LongFileName).digest('hex'));
await storage.set(longKey, 'data');
// Check if a hash of the key has been used for the filename part of the key.
expect(data.has(longKeyId)).toBeTruthy();
data.clear();
// Check that normal keys stay unaffected
const normalKey = '/container/test';
const normalKeyId = joinUrl(containerIdentifier, normalKey);
await storage.set(normalKey, 'data');
expect(data.has(normalKeyId)).toBeTruthy();
});
});

View File

@ -0,0 +1,84 @@
import type { KeyValueStorage } from '../../../../src/storage/keyvalue/KeyValueStorage';
import { MaxKeyLengthStorage } from '../../../../src/storage/keyvalue/MaxKeyLengthStorage';
import { NotImplementedHttpError } from '../../../../src/util/errors/NotImplementedHttpError';
describe('A MaxKeyLengthStorage', (): void => {
const key = 'key';
const longKey = 'thisisaverylongkeythatismorecharactersthansupportedbythestoragewhichallowsus' +
'tocheckifthehashtriggeractuallyoccursinthecasesthatarenecessarybutnotintheothercasesasthatwouldcauseissues';
const hash = '10a298fdb8f50bf0ddef6aac982c54f5613c357f897ab954daee498c60c6cad2';
const hashedKey = `$hash$${hash}`;
const payload = 'data';
let source: jest.Mocked<KeyValueStorage<string, any>>;
let storage: MaxKeyLengthStorage<string>;
beforeEach(async(): Promise<void> => {
const entries = [
[ key, { key, payload }],
[ hashedKey, { key: longKey, payload }],
];
source = {
has: jest.fn().mockResolvedValue(false),
get: jest.fn(),
set: jest.fn(),
delete: jest.fn().mockResolvedValue(false),
entries: jest.fn(async function* (): AsyncIterableIterator<any> {
yield* entries;
}),
};
storage = new MaxKeyLengthStorage(source);
});
it('checks the source for existence.', async(): Promise<void> => {
await expect(storage.has(key)).resolves.toBe(false);
expect(source.has).toHaveBeenCalledTimes(1);
expect(source.has).toHaveBeenLastCalledWith(key);
await expect(storage.has(longKey)).resolves.toBe(false);
expect(source.has).toHaveBeenCalledTimes(2);
expect(source.has).toHaveBeenLastCalledWith(hashedKey);
});
it('checks the source for data.', async(): Promise<void> => {
await expect(storage.get(key)).resolves.toBeUndefined();
expect(source.get).toHaveBeenCalledTimes(1);
expect(source.get).toHaveBeenLastCalledWith(key);
await expect(storage.get(longKey)).resolves.toBeUndefined();
expect(source.get).toHaveBeenCalledTimes(2);
expect(source.get).toHaveBeenLastCalledWith(hashedKey);
});
it('wraps data before writing it to the source.', async(): Promise<void> => {
await expect(storage.set(key, payload)).resolves.toBe(storage);
expect(source.set).toHaveBeenCalledTimes(1);
expect(source.set).toHaveBeenLastCalledWith(key, { key, payload });
await expect(storage.set(longKey, payload)).resolves.toBe(storage);
expect(source.set).toHaveBeenCalledTimes(2);
expect(source.set).toHaveBeenLastCalledWith(hashedKey, { key: longKey, payload });
});
it('calls the source to delete entries.', async(): Promise<void> => {
await expect(storage.delete(key)).resolves.toBe(false);
expect(source.delete).toHaveBeenCalledTimes(1);
expect(source.delete).toHaveBeenLastCalledWith(key);
await expect(storage.delete(longKey)).resolves.toBe(false);
expect(source.delete).toHaveBeenCalledTimes(2);
expect(source.delete).toHaveBeenLastCalledWith(hashedKey);
});
it('returns the correct entries.', async(): Promise<void> => {
const entries = [];
for await (const entry of storage.entries()) {
entries.push(entry);
}
expect(entries).toEqual([
[ key, payload ],
[ longKey, payload ],
]);
});
it('errors trying to write with a key that has the hash prefix.', async(): Promise<void> => {
await expect(storage.set(`$hash$key`, payload)).rejects.toThrow(NotImplementedHttpError);
});
});