diff --git a/experimental/CHANGELOG.md b/experimental/CHANGELOG.md index 7935a1df7f3..de1e3ff894b 100644 --- a/experimental/CHANGELOG.md +++ b/experimental/CHANGELOG.md @@ -46,6 +46,9 @@ All notable changes to experimental packages in this project will be documented ### :bug: (Bug Fix) +* fix(instrumentation-http): drop url.parse in favor of URL constructor [#5091](https://github.com/open-telemetry/opentelemetry-js/pull/5091) @pichlermarc + * fixes a bug where using cyrillic characters in a client request string URL would throw an exception, whereas an un-instrumented client would accept the same input without throwing an exception + ### :books: (Refine Doc) ### :house: (Internal) diff --git a/experimental/packages/opentelemetry-instrumentation-http/src/http.ts b/experimental/packages/opentelemetry-instrumentation-http/src/http.ts index eec43940352..f7672440cfc 100644 --- a/experimental/packages/opentelemetry-instrumentation-http/src/http.ts +++ b/experimental/packages/opentelemetry-instrumentation-http/src/http.ts @@ -632,15 +632,19 @@ export class HttpInstrumentation extends InstrumentationBase original.apply(this, [optionsParsed, ...args]), + () => { + if (invalidUrl) { + // we know that the url is invalid, there's no point in injecting context as it will fail validation. + // Passing in what the user provided will give the user an error that matches what they'd see without + // the instrumentation. + return original.apply(this, [options, ...args]); + } else { + return original.apply(this, [optionsParsed, ...args]); + } + }, error => { if (error) { setSpanWithError(span, error, instrumentation._semconvStability); diff --git a/experimental/packages/opentelemetry-instrumentation-http/src/utils.ts b/experimental/packages/opentelemetry-instrumentation-http/src/utils.ts index 97e7b28e5c6..e7a60aa054f 100644 --- a/experimental/packages/opentelemetry-instrumentation-http/src/utils.ts +++ b/experimental/packages/opentelemetry-instrumentation-http/src/utils.ts @@ -19,6 +19,7 @@ import { Span, context, SpanKind, + DiagLogger, } from '@opentelemetry/api'; import { ATTR_CLIENT_ADDRESS, @@ -235,27 +236,104 @@ export const isCompressed = ( return !!encoding && encoding !== 'identity'; }; +/** + * Mimics Node.js conversion of URL strings to RequestOptions expected by + * `http.request` and `https.request` APIs. + * + * See https://github.com/nodejs/node/blob/2505e217bba05fc581b572c685c5cf280a16c5a3/lib/internal/url.js#L1415-L1437 + * + * @param stringUrl + * @throws TypeError if the URL is not valid. + */ +function stringUrlToHttpOptions( + stringUrl: string +): RequestOptions & { pathname: string } { + // This is heavily inspired by Node.js handling of the same situation, trying + // to follow it as closely as possible while keeping in mind that we only + // deal with string URLs, not URL objects. + const { + hostname, + pathname, + port, + username, + password, + search, + protocol, + hash, + href, + origin, + host, + } = new URL(stringUrl); + + const options: RequestOptions & { + pathname: string; + hash: string; + search: string; + href: string; + origin: string; + } = { + protocol: protocol, + hostname: + hostname && hostname[0] === '[' ? hostname.slice(1, -1) : hostname, + hash: hash, + search: search, + pathname: pathname, + path: `${pathname || ''}${search || ''}`, + href: href, + origin: origin, + host: host, + }; + if (port !== '') { + options.port = Number(port); + } + if (username || password) { + options.auth = `${decodeURIComponent(username)}:${decodeURIComponent( + password + )}`; + } + return options; +} + /** * Makes sure options is an url object * return an object with default value and parsed options + * @param logger component logger * @param options original options for the request * @param [extraOptions] additional options for the request */ export const getRequestInfo = ( + logger: DiagLogger, options: url.URL | RequestOptions | string, extraOptions?: RequestOptions ): { origin: string; pathname: string; method: string; + invalidUrl: boolean; optionsParsed: RequestOptions; } => { - let pathname = '/'; - let origin = ''; + let pathname: string; + let origin: string; let optionsParsed: RequestOptions; + let invalidUrl = false; if (typeof options === 'string') { - optionsParsed = url.parse(options); - pathname = (optionsParsed as url.UrlWithStringQuery).pathname || '/'; + try { + const convertedOptions = stringUrlToHttpOptions(options); + optionsParsed = convertedOptions; + pathname = convertedOptions.pathname || '/'; + } catch (e) { + invalidUrl = true; + logger.verbose( + 'Unable to parse URL provided to HTTP request, using fallback to determine path. Original error:', + e + ); + // for backward compatibility with how url.parse() behaved. + optionsParsed = { + path: options, + }; + pathname = optionsParsed.path || '/'; + } + origin = `${optionsParsed.protocol || 'http:'}//${optionsParsed.host}`; if (extraOptions !== undefined) { Object.assign(optionsParsed, extraOptions); @@ -285,16 +363,23 @@ export const getRequestInfo = ( { protocol: options.host ? 'http:' : undefined }, options ); - pathname = (options as url.URL).pathname; - if (!pathname && optionsParsed.path) { - pathname = url.parse(optionsParsed.path).pathname || '/'; - } + const hostname = optionsParsed.host || (optionsParsed.port != null ? `${optionsParsed.hostname}${optionsParsed.port}` : optionsParsed.hostname); origin = `${optionsParsed.protocol || 'http:'}//${hostname}`; + + pathname = (options as url.URL).pathname; + if (!pathname && optionsParsed.path) { + try { + const parsedUrl = new URL(optionsParsed.path, origin); + pathname = parsedUrl.pathname || '/'; + } catch (e) { + pathname = '/'; + } + } } // some packages return method in lowercase.. @@ -303,7 +388,7 @@ export const getRequestInfo = ( ? optionsParsed.method.toUpperCase() : 'GET'; - return { origin, pathname, method, optionsParsed }; + return { origin, pathname, method, optionsParsed, invalidUrl }; }; /** @@ -657,6 +742,42 @@ export function getRemoteClientAddress( return null; } +function getInfoFromIncomingMessage( + component: 'http' | 'https', + request: IncomingMessage, + logger: DiagLogger +): { pathname?: string; search?: string; toString: () => string } { + try { + if (request.headers.host) { + return new URL( + request.url ?? '/', + `${component}://${request.headers.host}` + ); + } else { + const unsafeParsedUrl = new URL( + request.url ?? '/', + // using localhost as a workaround to still use the URL constructor for parsing + `${component}://localhost` + ); + // since we use localhost as a workaround, ensure we hide the rest of the properties to avoid + // our workaround leaking though. + return { + pathname: unsafeParsedUrl.pathname, + search: unsafeParsedUrl.search, + toString: function () { + // we cannot use the result of unsafeParsedUrl.toString as it's potentially wrong. + return unsafeParsedUrl.pathname + unsafeParsedUrl.search; + }, + }; + } + } catch (e) { + // something is wrong, use undefined - this *should* never happen, logging + // for troubleshooting in case it does happen. + logger.verbose('Unable to get URL from request', e); + return {}; + } +} + /** * Returns incoming request attributes scoped to the request data * @param {IncomingMessage} request the request object @@ -670,18 +791,15 @@ export const getIncomingRequestAttributes = ( serverName?: string; hookAttributes?: Attributes; semconvStability: SemconvStability; - } + }, + logger: DiagLogger ): Attributes => { const headers = request.headers; const userAgent = headers['user-agent']; const ips = headers['x-forwarded-for']; const httpVersion = request.httpVersion; - const requestUrl = request.url ? url.parse(request.url) : null; - const host = requestUrl?.host || headers.host; - const hostname = - requestUrl?.hostname || - host?.replace(/^(.*)(:[0-9]{1,5})/, '$1') || - 'localhost'; + const host = headers.host; + const hostname = host?.replace(/^(.*)(:[0-9]{1,5})/, '$1') || 'localhost'; const method = request.method; const normalizedMethod = normalizeMethod(method); @@ -701,8 +819,14 @@ export const getIncomingRequestAttributes = ( [ATTR_USER_AGENT_ORIGINAL]: userAgent, }; - if (requestUrl?.pathname != null) { - newAttributes[ATTR_URL_PATH] = requestUrl.pathname; + const parsedUrl = getInfoFromIncomingMessage( + options.component, + request, + logger + ); + + if (parsedUrl?.pathname != null) { + newAttributes[ATTR_URL_PATH] = parsedUrl.pathname; } if (remoteClientAddress != null) { @@ -719,11 +843,7 @@ export const getIncomingRequestAttributes = ( } const oldAttributes: Attributes = { - [SEMATTRS_HTTP_URL]: getAbsoluteUrl( - requestUrl, - headers, - `${options.component}:` - ), + [SEMATTRS_HTTP_URL]: parsedUrl.toString(), [SEMATTRS_HTTP_HOST]: host, [SEMATTRS_NET_HOST_NAME]: hostname, [SEMATTRS_HTTP_METHOD]: method, @@ -738,8 +858,9 @@ export const getIncomingRequestAttributes = ( oldAttributes[SEMATTRS_HTTP_SERVER_NAME] = serverName; } - if (requestUrl) { - oldAttributes[SEMATTRS_HTTP_TARGET] = requestUrl.path || '/'; + if (parsedUrl?.pathname) { + oldAttributes[SEMATTRS_HTTP_TARGET] = + parsedUrl?.pathname + parsedUrl?.search || '/'; } if (userAgent !== undefined) { diff --git a/experimental/packages/opentelemetry-instrumentation-http/test/functionals/http-enable.test.ts b/experimental/packages/opentelemetry-instrumentation-http/test/functionals/http-enable.test.ts index b7878efc0a9..0d515baf46d 100644 --- a/experimental/packages/opentelemetry-instrumentation-http/test/functionals/http-enable.test.ts +++ b/experimental/packages/opentelemetry-instrumentation-http/test/functionals/http-enable.test.ts @@ -350,6 +350,15 @@ describe('HttpInstrumentation', () => { assert.strictEqual(rpcData.route, undefined); rpcData.route = 'TheRoute'; } + if (request.url?.includes('/login')) { + assert.strictEqual( + request.headers.authorization, + 'Basic ' + Buffer.from('username:password').toString('base64') + ); + } + if (request.url?.includes('/withQuery')) { + assert.match(request.url, /withQuery\?foo=bar$/); + } response.end('Test Server Response'); }); @@ -597,7 +606,7 @@ describe('HttpInstrumentation', () => { assert.strictEqual(spans.length, 2); }); - for (const arg of ['string', {}, new Date()]) { + for (const arg of [{}, new Date()]) { it(`should be traceable and not throw exception in ${protocol} instrumentation when passing the following argument ${JSON.stringify( arg )}`, async () => { @@ -1005,6 +1014,34 @@ describe('HttpInstrumentation', () => { assert.deepStrictEqual(warnMessages, []); }); + + it('should not throw with cyrillic characters in the request path', async () => { + // see https://github.com/open-telemetry/opentelemetry-js/issues/5060 + await httpRequest.get(`${protocol}://${hostname}:${serverPort}/привет`); + }); + + it('should keep username and password in the request', async () => { + await httpRequest.get( + `${protocol}://username:password@${hostname}:${serverPort}/login` + ); + }); + + it('should keep query in the request', async () => { + await httpRequest.get( + `${protocol}://${hostname}:${serverPort}/withQuery?foo=bar` + ); + }); + + it('using an invalid url does throw from client but still creates a span', async () => { + try { + await httpRequest.get(`http://instrumentation.test:string-as-port/`); + } catch (e) { + assert.match(e.message, /Invalid URL/); + } + + const spans = memoryExporter.getFinishedSpans(); + assert.strictEqual(spans.length, 1); + }); }); describe('with semconv stability set to http', () => { diff --git a/experimental/packages/opentelemetry-instrumentation-http/test/functionals/https-enable.test.ts b/experimental/packages/opentelemetry-instrumentation-http/test/functionals/https-enable.test.ts index 7c2e68bb0f7..a9f3b506a60 100644 --- a/experimental/packages/opentelemetry-instrumentation-http/test/functionals/https-enable.test.ts +++ b/experimental/packages/opentelemetry-instrumentation-http/test/functionals/https-enable.test.ts @@ -435,7 +435,7 @@ describe('HttpsInstrumentation', () => { assert.strictEqual(spans.length, 0); }); - for (const arg of ['string', {}, new Date()]) { + for (const arg of [{}, new Date()]) { it(`should be traceable and not throw exception in ${protocol} instrumentation when passing the following argument ${JSON.stringify( arg )}`, async () => { @@ -672,6 +672,31 @@ describe('HttpsInstrumentation', () => { }); req.end(); }); + + it('should keep username and password in the request', async () => { + await httpsRequest.get( + `${protocol}://username:password@${hostname}:${serverPort}/login` + ); + }); + + it('should keep query in the request', async () => { + await httpsRequest.get( + `${protocol}://${hostname}:${serverPort}/withQuery?foo=bar` + ); + }); + + it('using an invalid url does throw from client but still creates a span', async () => { + try { + await httpsRequest.get( + `${protocol}://instrumentation.test:string-as-port/` + ); + } catch (e) { + assert.match(e.message, /Invalid URL/); + } + + const spans = memoryExporter.getFinishedSpans(); + assert.strictEqual(spans.length, 1); + }); }); describe('partially disable instrumentation', () => { diff --git a/experimental/packages/opentelemetry-instrumentation-http/test/functionals/utils.test.ts b/experimental/packages/opentelemetry-instrumentation-http/test/functionals/utils.test.ts index 80e48126fd3..4f41548c897 100644 --- a/experimental/packages/opentelemetry-instrumentation-http/test/functionals/utils.test.ts +++ b/experimental/packages/opentelemetry-instrumentation-http/test/functionals/utils.test.ts @@ -19,6 +19,7 @@ import { SpanKind, context, Span, + diag, } from '@opentelemetry/api'; import { SEMATTRS_HTTP_REQUEST_CONTENT_LENGTH, @@ -101,7 +102,7 @@ describe('Utility', () => { urlParsedWithUndefinedHostAndNullPort, whatWgUrl, ]) { - const result = utils.getRequestInfo(param); + const result = utils.getRequestInfo(diag, param); assert.strictEqual(result.optionsParsed.hostname, 'google.fr'); assert.strictEqual(result.optionsParsed.protocol, 'http:'); assert.strictEqual(result.optionsParsed.path, '/aPath?qu=ry'); @@ -432,10 +433,14 @@ describe('Utility', () => { 'user-agent': 'chrome', 'x-forwarded-for': ', , ', }; - const attributes = utils.getIncomingRequestAttributes(request, { - component: 'http', - semconvStability: SemconvStability.OLD, - }); + const attributes = utils.getIncomingRequestAttributes( + request, + { + component: 'http', + semconvStability: SemconvStability.OLD, + }, + diag + ); assert.strictEqual(attributes[SEMATTRS_HTTP_ROUTE], undefined); }); @@ -448,10 +453,14 @@ describe('Utility', () => { request.headers = { 'user-agent': 'chrome', }; - const attributes = utils.getIncomingRequestAttributes(request, { - component: 'http', - semconvStability: SemconvStability.OLD, - }); + const attributes = utils.getIncomingRequestAttributes( + request, + { + component: 'http', + semconvStability: SemconvStability.OLD, + }, + diag + ); assert.strictEqual(attributes[SEMATTRS_HTTP_TARGET], '/user/?q=val'); }); });