diff --git a/config/storage/key-value/resource-store.json b/config/storage/key-value/resource-store.json index 4821fd8ed..0d9db37b1 100644 --- a/config/storage/key-value/resource-store.json +++ b/config/storage/key-value/resource-store.json @@ -5,7 +5,10 @@ ], "@graph": [ { - "comment": "A storage that writes directly to a low-level store. This is necessary to prevent infinite loops with stores that also use storage.", + "comment": [ + "A storage that writes directly to a low-level store. This is necessary to prevent infinite loops with stores that also use storage.", + "Currently only used for storing locks, where keys are always hashed, which is why this is not wrapped by a MaxKeyLengthStorage for now." + ], "@id": "urn:solid-server:default:BackendKeyValueStorage", "@type": "JsonResourceStorage", "source": { "@id": "urn:solid-server:default:ResourceStore_Backend" }, @@ -15,10 +18,14 @@ { "comment": "Internal value storage.", "@id": "urn:solid-server:default:KeyValueStorage", - "@type": "JsonResourceStorage", - "source": { "@id": "urn:solid-server:default:ResourceStore" }, - "baseUrl": { "@id": "urn:solid-server:default:variable:baseUrl" }, - "container": "/.internal/" + "@type": "MaxKeyLengthStorage", + "source": { + "@id": "urn:solid-server:default:JsonResourceStorage", + "@type": "JsonResourceStorage", + "source": { "@id": "urn:solid-server:default:ResourceStore" }, + "baseUrl": { "@id": "urn:solid-server:default:variable:baseUrl" }, + "container": "/.internal/" + } }, { "comment": "Block external access to the storage containers to avoid exposing internal data.", diff --git a/src/index.ts b/src/index.ts index b3d4dd42c..53317d406 100644 --- a/src/index.ts +++ b/src/index.ts @@ -473,6 +473,7 @@ export * from './storage/keyvalue/IndexedStorage'; export * from './storage/keyvalue/JsonFileStorage'; export * from './storage/keyvalue/JsonResourceStorage'; export * from './storage/keyvalue/KeyValueStorage'; +export * from './storage/keyvalue/MaxKeyLengthStorage'; export * from './storage/keyvalue/MemoryMapStorage'; export * from './storage/keyvalue/PassthroughKeyValueStorage'; export * from './storage/keyvalue/WrappedExpiringStorage'; diff --git a/src/storage/keyvalue/JsonResourceStorage.ts b/src/storage/keyvalue/JsonResourceStorage.ts index 66be4af61..8682bfdee 100644 --- a/src/storage/keyvalue/JsonResourceStorage.ts +++ b/src/storage/keyvalue/JsonResourceStorage.ts @@ -1,19 +1,14 @@ -import { createHash } from 'crypto'; -import { parse } from 'path'; import { BasicRepresentation } from '../../http/representation/BasicRepresentation'; import type { Representation } from '../../http/representation/Representation'; import type { ResourceIdentifier } from '../../http/representation/ResourceIdentifier'; +import { getLoggerFor } from '../../logging/LogUtil'; import { NotFoundHttpError } from '../../util/errors/NotFoundHttpError'; -import { ensureLeadingSlash, ensureTrailingSlash, isContainerIdentifier, joinUrl, - joinFilePath } from '../../util/PathUtil'; +import { ensureTrailingSlash, isContainerIdentifier, joinUrl, trimLeadingSlashes } from '../../util/PathUtil'; import { readableToString } from '../../util/StreamUtil'; import { LDP } from '../../util/Vocabularies'; import type { ResourceStore } from '../ResourceStore'; import type { KeyValueStorage } from './KeyValueStorage'; -// Maximum allowed length for the keys, longer keys will be hashed. -const KEY_LENGTH_LIMIT = 255; - /** * A {@link KeyValueStorage} for JSON-like objects using a {@link ResourceStore} as backend. * @@ -27,6 +22,8 @@ const KEY_LENGTH_LIMIT = 255; * All non-404 errors will be re-thrown. */ export class JsonResourceStorage implements KeyValueStorage { + protected readonly logger = getLoggerFor(this); + protected readonly source: ResourceStore; protected readonly container: string; @@ -120,15 +117,6 @@ export class JsonResourceStorage implements KeyValueStorage { * Converts a key into an identifier for internal storage. */ protected keyToIdentifier(key: string): ResourceIdentifier { - // Parse the key as a file path - const parsedPath = parse(key); - // Hash long filenames to prevent issues with the underlying storage. - // E.g. a UNIX a file name cannot exceed 255 bytes. - // This is a temporary fix for https://github.com/CommunitySolidServer/CommunitySolidServer/issues/1013, - // until we have a solution for data migration. - if (parsedPath.base.length > KEY_LENGTH_LIMIT) { - key = joinFilePath(parsedPath.dir, this.applyHash(parsedPath.base)); - } return { path: joinUrl(this.container, key) }; } @@ -142,8 +130,4 @@ export class JsonResourceStorage implements KeyValueStorage { // on the `entries` results matching a key that was sent before. return ensureLeadingSlash(identifier.path.slice(this.container.length)); } - - protected applyHash(key: string): string { - return createHash('sha256').update(key).digest('hex'); - } } diff --git a/src/storage/keyvalue/KeyValueStorage.ts b/src/storage/keyvalue/KeyValueStorage.ts index 95c50725a..4b238274f 100644 --- a/src/storage/keyvalue/KeyValueStorage.ts +++ b/src/storage/keyvalue/KeyValueStorage.ts @@ -1,5 +1,6 @@ /** * A simple storage solution that can be used for internal values that need to be stored. + * To prevent potential issues, keys should be urlencoded before calling the storage. */ export interface KeyValueStorage { /** diff --git a/src/storage/keyvalue/MaxKeyLengthStorage.ts b/src/storage/keyvalue/MaxKeyLengthStorage.ts new file mode 100644 index 000000000..28d28520e --- /dev/null +++ b/src/storage/keyvalue/MaxKeyLengthStorage.ts @@ -0,0 +1,92 @@ +import { createHash } from 'crypto'; +import { getLoggerFor } from '../../logging/LogUtil'; +import { NotImplementedHttpError } from '../../util/errors/NotImplementedHttpError'; +import type { KeyValueStorage } from './KeyValueStorage'; + +type Entry = { + key: string; + payload: T; +}; + +/** + * A {@link KeyValueStorage} that hashes keys in case they would be longer than the set limit. + * Hashed keys are prefixed with a certain value to prevent issues with incoming keys that are already hashed. + * The default max length is 150 and the default prefix is `$hash$`. + * + * This class mostly exists to prevent issues when writing storage entries to disk. + * Keys that are too long would cause issues with the file name limit. + * For this reason, only the part after the last `/` in a key is hashed, to preserve the expected file structure. + */ +export class MaxKeyLengthStorage implements KeyValueStorage { + protected readonly logger = getLoggerFor(this); + + protected readonly source: KeyValueStorage>; + protected readonly maxKeyLength: number; + protected readonly hashPrefix: string; + + public constructor(source: KeyValueStorage>, maxKeyLength = 150, hashPrefix = '$hash$') { + this.source = source; + this.maxKeyLength = maxKeyLength; + this.hashPrefix = hashPrefix; + } + + public async has(key: string): Promise { + return this.source.has(this.getKey(key)); + } + + public async get(key: string): Promise { + return (await this.source.get(this.getKey(key)))?.payload; + } + + public async set(key: string, value: T): Promise { + await this.source.set(this.getKeyWithCheck(key), this.wrapPayload(key, value)); + return this; + } + + public async delete(key: string): Promise { + return this.source.delete(this.getKey(key)); + } + + public async* entries(): AsyncIterableIterator<[string, T]> { + for await (const [ , val ] of this.source.entries()) { + yield [ val.key, val.payload ]; + } + } + + protected wrapPayload(key: string, payload: T): Entry { + return { key, payload }; + } + + /** + * Similar to `getKey` but checks to make sure the key does not already contain the prefix. + * Only necessary for `set` calls. + */ + protected getKeyWithCheck(key: string): string { + const parts = key.split('/'); + + // Prevent non-hashed keys with the prefix to prevent false hits + if (parts[parts.length - 1].startsWith(this.hashPrefix)) { + throw new NotImplementedHttpError(`Unable to store keys starting with ${this.hashPrefix}`); + } + + return this.getKey(key, parts); + } + + /** + * Hashes the last part of the key if it is too long. + * Otherwise, just returns the key. + */ + protected getKey(key: string, parts?: string[]): string { + if (key.length <= this.maxKeyLength) { + return key; + } + + // Hash the key if it is too long + parts = parts ?? key.split('/'); + const last = parts.length - 1; + parts[last] = `${this.hashPrefix}${createHash('sha256').update(parts[last]).digest('hex')}`; + const newKey = parts.join('/'); + this.logger.debug(`Hashing key ${key} to ${newKey}`); + return newKey; + } +} diff --git a/test/unit/storage/keyvalue/JsonResourceStorage.test.ts b/test/unit/storage/keyvalue/JsonResourceStorage.test.ts index 41100a606..daa051cbb 100644 --- a/test/unit/storage/keyvalue/JsonResourceStorage.test.ts +++ b/test/unit/storage/keyvalue/JsonResourceStorage.test.ts @@ -1,4 +1,3 @@ -import { createHash } from 'crypto'; import { BasicRepresentation } from '../../../../src/http/representation/BasicRepresentation'; import type { Representation } from '../../../../src/http/representation/Representation'; import { RepresentationMetadata } from '../../../../src/http/representation/RepresentationMetadata'; @@ -6,7 +5,7 @@ import type { ResourceIdentifier } from '../../../../src/http/representation/Res import { JsonResourceStorage } from '../../../../src/storage/keyvalue/JsonResourceStorage'; import type { ResourceStore } from '../../../../src/storage/ResourceStore'; import { NotFoundHttpError } from '../../../../src/util/errors/NotFoundHttpError'; -import { isContainerIdentifier, joinUrl } from '../../../../src/util/PathUtil'; +import { isContainerIdentifier } from '../../../../src/util/PathUtil'; import { readableToString } from '../../../../src/util/StreamUtil'; import { LDP } from '../../../../src/util/Vocabularies'; @@ -123,24 +122,4 @@ describe('A JsonResourceStorage', (): void => { [ subPath, 'subDocument' ], ]); }); - - it('converts keys that would result in too large filenames into an identifier that uses a hash.', - async(): Promise => { - const longFileName = `${'sometext'.repeat(32)}.json`; - const b64LongFileName = Buffer.from(longFileName).toString('base64'); - const longKey = `/container/${b64LongFileName}`; - const longKeyId = joinUrl(subContainerIdentifier, createHash('sha256').update(b64LongFileName).digest('hex')); - - await storage.set(longKey, 'data'); - // Check if a hash of the key has been used for the filename part of the key. - expect(data.has(longKeyId)).toBeTruthy(); - - data.clear(); - - // Check that normal keys stay unaffected - const normalKey = '/container/test'; - const normalKeyId = joinUrl(containerIdentifier, normalKey); - await storage.set(normalKey, 'data'); - expect(data.has(normalKeyId)).toBeTruthy(); - }); }); diff --git a/test/unit/storage/keyvalue/MaxKeyLengthStorage.test.ts b/test/unit/storage/keyvalue/MaxKeyLengthStorage.test.ts new file mode 100644 index 000000000..d3dbd5d5a --- /dev/null +++ b/test/unit/storage/keyvalue/MaxKeyLengthStorage.test.ts @@ -0,0 +1,84 @@ +import type { KeyValueStorage } from '../../../../src/storage/keyvalue/KeyValueStorage'; +import { MaxKeyLengthStorage } from '../../../../src/storage/keyvalue/MaxKeyLengthStorage'; +import { NotImplementedHttpError } from '../../../../src/util/errors/NotImplementedHttpError'; + +describe('A MaxKeyLengthStorage', (): void => { + const key = 'key'; + const longKey = 'thisisaverylongkeythatismorecharactersthansupportedbythestoragewhichallowsus' + + 'tocheckifthehashtriggeractuallyoccursinthecasesthatarenecessarybutnotintheothercasesasthatwouldcauseissues'; + const hash = '10a298fdb8f50bf0ddef6aac982c54f5613c357f897ab954daee498c60c6cad2'; + const hashedKey = `$hash$${hash}`; + const payload = 'data'; + let source: jest.Mocked>; + let storage: MaxKeyLengthStorage; + + beforeEach(async(): Promise => { + const entries = [ + [ key, { key, payload }], + [ hashedKey, { key: longKey, payload }], + ]; + + source = { + has: jest.fn().mockResolvedValue(false), + get: jest.fn(), + set: jest.fn(), + delete: jest.fn().mockResolvedValue(false), + entries: jest.fn(async function* (): AsyncIterableIterator { + yield* entries; + }), + }; + + storage = new MaxKeyLengthStorage(source); + }); + + it('checks the source for existence.', async(): Promise => { + await expect(storage.has(key)).resolves.toBe(false); + expect(source.has).toHaveBeenCalledTimes(1); + expect(source.has).toHaveBeenLastCalledWith(key); + await expect(storage.has(longKey)).resolves.toBe(false); + expect(source.has).toHaveBeenCalledTimes(2); + expect(source.has).toHaveBeenLastCalledWith(hashedKey); + }); + + it('checks the source for data.', async(): Promise => { + await expect(storage.get(key)).resolves.toBeUndefined(); + expect(source.get).toHaveBeenCalledTimes(1); + expect(source.get).toHaveBeenLastCalledWith(key); + await expect(storage.get(longKey)).resolves.toBeUndefined(); + expect(source.get).toHaveBeenCalledTimes(2); + expect(source.get).toHaveBeenLastCalledWith(hashedKey); + }); + + it('wraps data before writing it to the source.', async(): Promise => { + await expect(storage.set(key, payload)).resolves.toBe(storage); + expect(source.set).toHaveBeenCalledTimes(1); + expect(source.set).toHaveBeenLastCalledWith(key, { key, payload }); + await expect(storage.set(longKey, payload)).resolves.toBe(storage); + expect(source.set).toHaveBeenCalledTimes(2); + expect(source.set).toHaveBeenLastCalledWith(hashedKey, { key: longKey, payload }); + }); + + it('calls the source to delete entries.', async(): Promise => { + await expect(storage.delete(key)).resolves.toBe(false); + expect(source.delete).toHaveBeenCalledTimes(1); + expect(source.delete).toHaveBeenLastCalledWith(key); + await expect(storage.delete(longKey)).resolves.toBe(false); + expect(source.delete).toHaveBeenCalledTimes(2); + expect(source.delete).toHaveBeenLastCalledWith(hashedKey); + }); + + it('returns the correct entries.', async(): Promise => { + const entries = []; + for await (const entry of storage.entries()) { + entries.push(entry); + } + expect(entries).toEqual([ + [ key, payload ], + [ longKey, payload ], + ]); + }); + + it('errors trying to write with a key that has the hash prefix.', async(): Promise => { + await expect(storage.set(`$hash$key`, payload)).rejects.toThrow(NotImplementedHttpError); + }); +});