feat: Decode URI in target extractor

This commit is contained in:
Joachim Van Herwegen
2020-10-06 10:46:18 +02:00
parent b47dc3f7f6
commit bb28af937b
4 changed files with 35 additions and 3 deletions

View File

@@ -1,6 +1,7 @@
import type { TLSSocket } from 'tls';
import { format } from 'url';
import type { HttpRequest } from '../../server/HttpRequest';
import { toCanonicalUrl } from '../../util/Util';
import type { ResourceIdentifier } from '../representation/ResourceIdentifier';
import { TargetExtractor } from './TargetExtractor';
@@ -28,6 +29,6 @@ export class BasicTargetExtractor extends TargetExtractor {
pathname: input.url,
});
return { path: url };
return { path: toCanonicalUrl(url) };
}
}

View File

@@ -66,3 +66,16 @@ export const pipeStreamsAndErrors = <T extends Writable>(readable: Readable, des
readable.on('error', (error): boolean => destination.emit('error', new UnsupportedHttpError(error.message)));
return destination;
};
/**
* Converts a URL string to the "canonical" version that should be used internally for consistency.
* Decodes all percent encodings and then makes sure only the necessary characters are encoded again.
*/
export const toCanonicalUrl = (url: string): string => {
const match = /(\w+:\/\/[^/]+\/)(.*)/u.exec(url);
if (!match) {
throw new UnsupportedHttpError(`Invalid URL ${url}`);
}
const [ , domain, path ] = match;
return encodeURI(domain + path.split('/').map(decodeURIComponent).join('/'));
};