Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(detector-gpc): suppress tracing when requesting for GCP metadata #2321

Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
*/

import * as gcpMetadata from 'gcp-metadata';
import { diag } from '@opentelemetry/api';
import { diag, context } from '@opentelemetry/api';
import {
Detector,
DetectorSync,
ResourceDetectionConfig,
Resource,
ResourceAttributes,
} from '@opentelemetry/resources';
import { getEnv } from '@opentelemetry/core';
import { getEnv, suppressTracing } from '@opentelemetry/core';
import {
CLOUDPROVIDERVALUES_GCP,
SEMRESATTRS_CLOUD_ACCOUNT_ID,
Expand All @@ -41,7 +41,7 @@ import {
* Cloud Platform and return a {@link Resource} populated with metadata about
* the instance. Returns an empty Resource if detection fails.
*/
class GcpDetector implements Detector {
class GcpDetector implements DetectorSync {
/**
* Attempts to connect and obtain instance configuration data from the GCP metadata service.
* If the connection is successful it returns a promise containing a {@link Resource}
Expand All @@ -50,20 +50,32 @@ class GcpDetector implements Detector {
*
* @param config The resource detection config
*/
async detect(_config?: ResourceDetectionConfig): Promise<Resource> {
detect(_config?: ResourceDetectionConfig): Resource {
return new Resource({}, this._getAttributes());
}

/** Gets resource attributtes using `gcp-metadata` */
private async _getAttributes(): Promise<ResourceAttributes> {
if (!(await gcpMetadata.isAvailable())) {
diag.debug('GcpDetector failed: GCP Metadata unavailable.');
return Resource.empty();
return {};
}

// These methods use `gcp-metadata` APIs which request to a local IP/domain
// Ref: https://github.com/googleapis/gcp-metadata/blob/d88841db90d7d390eefb0de02b736b41f6adddde/src/index.ts#L23
// The HTTP instrumetation sees the requests and starts spans for them. Internal tracing
// info does not belong to the instrumented service and shouldn't be sent.
// Ref: https://github.com/open-telemetry/opentelemetry-js-contrib/issues/2320
const [projectId, instanceId, zoneId, clusterName, hostname] =
await Promise.all([
this._getProjectId(),
this._getInstanceId(),
this._getZone(),
this._getClusterName(),
this._getHostname(),
]);
await context.with(suppressTracing(context.active()), () => {
return Promise.all([
this._getProjectId(),
this._getInstanceId(),
this._getZone(),
this._getClusterName(),
this._getHostname(),
]);
});

const attributes: ResourceAttributes = {};
attributes[SEMRESATTRS_CLOUD_ACCOUNT_ID] = projectId;
Expand All @@ -72,10 +84,11 @@ class GcpDetector implements Detector {
attributes[SEMRESATTRS_CLOUD_AVAILABILITY_ZONE] = zoneId;
attributes[SEMRESATTRS_CLOUD_PROVIDER] = CLOUDPROVIDERVALUES_GCP;

if (getEnv().KUBERNETES_SERVICE_HOST)
if (getEnv().KUBERNETES_SERVICE_HOST) {
this._addK8sAttributes(attributes, clusterName);
}

return new Resource(attributes);
return attributes;
}

/** Add resource attributes for K8s */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
* limitations under the License.
*/

import * as http from 'http';
import * as assert from 'assert';

import {
BASE_PATH,
HEADER_NAME,
Expand All @@ -30,8 +33,8 @@ import {
assertK8sResource,
assertContainerResource,
assertEmptyResource,
runTestFixture,
} from '@opentelemetry/contrib-test-utils';
import { Resource } from '@opentelemetry/resources';

const HEADERS = {
[HEADER_NAME.toLowerCase()]: HEADER_VALUE,
Expand Down Expand Up @@ -85,7 +88,10 @@ describe('gcpDetector', () => {
const secondaryScope = nock(SECONDARY_HOST_ADDRESS)
.get(INSTANCE_PATH)
.reply(200, {}, HEADERS);
const resource: Resource = await gcpDetector.detect();

const resource = gcpDetector.detect();
await resource.waitForAsyncAttributes?.();

secondaryScope.done();
scope.done();

Expand Down Expand Up @@ -121,7 +127,10 @@ describe('gcpDetector', () => {
const secondaryScope = nock(SECONDARY_HOST_ADDRESS)
.get(INSTANCE_PATH)
.reply(200, {}, HEADERS);
const resource = await gcpDetector.detect();

const resource = gcpDetector.detect();
await resource.waitForAsyncAttributes?.();

secondaryScope.done();
scope.done();

Expand Down Expand Up @@ -155,7 +164,10 @@ describe('gcpDetector', () => {
const secondaryScope = nock(SECONDARY_HOST_ADDRESS)
.get(INSTANCE_PATH)
.reply(200, {}, HEADERS);
const resource = await gcpDetector.detect();

const resource = gcpDetector.detect();
await resource.waitForAsyncAttributes?.();

secondaryScope.done();
scope.done();

Expand All @@ -167,8 +179,77 @@ describe('gcpDetector', () => {
});

it('returns empty resource if not detected', async () => {
const resource = await gcpDetector.detect();
const resource = gcpDetector.detect();
await resource.waitForAsyncAttributes?.();

assertEmptyResource(resource);
});
});

describe('internal tracing', () => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I guess I forgot to hit "save" in my first review.)

Wow this was a fair amount of effort to write a test case for this.
I don't know how others would feel, but I wouldn't require similar test code be added for suppressTracing usage in other detectors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup it was a fair amount of work 😓. But I needed

  • the sdk running so I had to create the fixture
  • to avoid nock since real requests won't happen so we won't be really testing
  • gcp-metadata to get responses. I could let it fail but I found that the request time vary in a way that makes the test flaky.

there is another approach I can take but 1st I wanted some feeedback for this one. If we finally use this one I may be inclined to move the mock server to @opentelemetry/contrib-test-utils

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

About the 3rd point in my previous comment I found that if the async attributes of the resource are resolved after calling sdk.shutdown() we loose some spans. Without knowing to much about the mechanics of the processors/exporters I'd guess that these spans are not yet in the queue that is flushed during shutdown. I think this could be a potential issue for apps that have a short execution time (thinking on lamba or similar)

Copy link
Contributor Author

@david-luna david-luna Jul 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trentm the other approach seems to work and it's shorter 🎉 (although we're getting a resource with empty values). Changes are

  • remove the mock server
  • trigger the detection again and waitForAsyncAttributes
diff --git a/detectors/node/opentelemetry-resource-detector-gcp/test/fixtures/use-gcp-detector.js b/detectors/node/opentelemetry-resource-detector-gcp/test/fixtures/use-gcp-detector.js
index 0c71a081..f88d2ae4 100644
--- a/detectors/node/opentelemetry-resource-detector-gcp/test/fixtures/use-gcp-detector.js
+++ b/detectors/node/opentelemetry-resource-detector-gcp/test/fixtures/use-gcp-detector.js
@@ -78,6 +78,9 @@ server.listen(0, '127.0.0.1', async function () {
   });

   // flush any left spans
+  // NOTE: this adds extra requests but its necessary to make sure
+  // spans have the resouce and are queued in the exporter
+  await gcpDetector.detect().waitForAsyncAttributes();
   await sdk.shutdown();
   await new Promise(resolve => server.close(resolve));
 });

A couple of observations:

  • test becomes slower since the requests are made again at the end of the fixture code
  • this detector just falls back to empty string if the request fails but I'm not sure if this will work on other detectors

I'm keen to use this simplified way but it is possible we have to do the server mock for other detectors (I hope not). WDYT?

it('should not export traces related to GCP detection', async () => {
// Mock server for `gcp-metadata`
const gcpServer = http.createServer((req, res) => {
const responseMap: Record<string, string> = {
[INSTANCE_PATH]: '{}',
[INSTANCE_ID_PATH]: '4520031799277581759',
[PROJECT_ID_PATH]: 'my-project-id',
[ZONE_PATH]: 'project/zone/my-zone',
[CLUSTER_NAME_PATH]: 'my-cluster',
[HOSTNAME_PATH]: 'dev.my-project.local',
};
req.resume();
req.on('end', function () {
const body = responseMap[req.url!] || '';
res.writeHead(200, {
...HEADERS,
'content-type': body === '{}' ? 'application/json' : 'text/plain',
});
res.end(body);
});
});
const port = await new Promise(resolve => {
gcpServer.listen(0, '127.0.0.1', function () {
resolve((gcpServer.address() as any).port);
});
});

await runTestFixture({
cwd: __dirname,
argv: ['../fixtures/use-gcp-detector.js'],
env: {
// We setup `gcp-metadata` to request to our own server so we can mock
// responses even if the detector is in a different process. It also speeds up the
// test because leaving it undefined would try to request to a internal IP which I found
// the time to fail is variable
// Ref: https://github.com/googleapis/gcp-metadata/blob/d88841db90d7d390eefb0de02b736b41f6adddde/README.md#environment-variables
GCE_METADATA_HOST: `127.0.0.1:${port}`,
},
checkResult: (err, stdout, stderr) => {
assert.ifError(err);
},
checkCollector(collector) {
const httpScope = '@opentelemetry/instrumentation-http';
const spans = collector.sortedSpans;
const httpSpans = spans.filter(
s => s.instrumentationScope.name === httpScope
);
const gcpSpans = httpSpans.filter(s => {
return s.attributes.some(
a =>
a.key === 'http.url' &&
a.value.stringValue?.includes('/computeMetadata/v1/')
);
});

// SDK collects the 2 spans from the fixture
assert.strictEqual(httpSpans.length, 2);
// but no spans related to GCP detector
assert.strictEqual(gcpSpans.length, 0);
},
});

gcpServer.close();
});
});
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: missing license header, and mixed 4- and 2-space indents in this file. Unfortunately the eslint setup doesn't lint .js files.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks! maybe we could also make eslint work in js (but not in this PR)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definite not this PR.

Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
const { createTestNodeSdk } = require ('@opentelemetry/contrib-test-utils');
const { HttpInstrumentation } = require ('@opentelemetry/instrumentation-http');
const { gcpDetector } = require ('../../build/src/index.js');


const sdk = createTestNodeSdk({
serviceName: 'use-detector-gcp',
instrumentations: [
new HttpInstrumentation(),
],
resourceDetectors: [gcpDetector],
});

sdk.start();

const http = require('http');

const server = http.createServer((req,res) => {
console.log('incoming request: %s %s %s', req.method, req.url, req.headers);

req.resume();
req.on('end', function () {
const body = 'pong';
res.writeHead(200, {
'content-type': 'text/plain',
'content-length': body.length,
});
res.end(body);
});
});

server.listen(0, '127.0.0.1', async function () {
const port = server.address().port;

// First request to show a client error.
const startTime = Date.now();
await new Promise((resolve) => {
const clientReq = http.request(
`http://127.0.0.1:${port}/ping`,
function (cres) {
console.log(
'client response: %s %s',
cres.statusCode,
cres.headers
);
const chunks = [];
cres.on('data', function (chunk) {
chunks.push(chunk);
});
cres.on('end', function () {
const body = chunks.join('');
console.log('client response body: %j', body);
resolve();
});
}
);
clientReq.write('ping');
clientReq.end();
});

// flush any left spans
await sdk.shutdown();
await new Promise(resolve => server.close(resolve));
});
3 changes: 3 additions & 0 deletions packages/opentelemetry-test-utils/src/test-fixtures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
} from '@opentelemetry/otlp-transformer';
import { NodeSDK, tracing } from '@opentelemetry/sdk-node';
import type { Instrumentation } from '@opentelemetry/instrumentation';
import { Detector, DetectorSync } from '@opentelemetry/resources';

/**
* A utility for scripts that will be run with `runTestFixture()` to create an
Expand All @@ -43,6 +44,7 @@ import type { Instrumentation } from '@opentelemetry/instrumentation';
export function createTestNodeSdk(opts: {
serviceName?: string;
instrumentations: (Instrumentation | Instrumentation[])[];
resourceDetectors: Array<Detector | DetectorSync>;
}) {
const spanProcessor = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
? undefined
Expand All @@ -51,6 +53,7 @@ export function createTestNodeSdk(opts: {
serviceName: opts.serviceName || 'test-service',
spanProcessor,
instrumentations: opts.instrumentations,
resourceDetectors: opts.resourceDetectors || [],
});
return sdk;
}
Expand Down