Skip to content

Commit

Permalink
Merge pull request #367 from GSA/lc/1139-third-party-service-url
Browse files Browse the repository at this point in the history
Third Party Services URLs
  • Loading branch information
luke-at-flexion authored Sep 16, 2024
2 parents 53516fa + 68afd43 commit 63a061a
Show file tree
Hide file tree
Showing 9 changed files with 195 additions and 9 deletions.
12 changes: 12 additions & 0 deletions entities/core-result.entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,18 @@ export class CoreResult {
})
thirdPartyServiceDomains?: string;

@Column({ nullable: true })
//@Expose({ name: 'third_party_service_urls' })
@Exclude()
@Transform((value: string) => {
if (value) {
return value.split(',');
} else {
return 'null';
}
})
thirdPartyServiceUrls?: string;

@Column({ nullable: true })
@Expose({ name: 'third_party_service_count' })
thirdPartyServiceCount?: number;
Expand Down
1 change: 1 addition & 0 deletions entities/scan-data.entity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export type SeoScan = {
export type ThirdPartyScan = {
thirdPartyServiceDomains: string;
thirdPartyServiceCount: number;
thirdPartyServiceUrls: string;
};

export type CookieScan = {
Expand Down
80 changes: 80 additions & 0 deletions libs/core-scanner/src/metric-utils.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import pino from 'pino';
import { logCount, logTimer } from './metric-utils';


const mockLogger = pino();

describe('Metric Utilities', () => {

describe('logCount()', () => {
afterAll(() => {
jest.resetAllMocks();
});
it('should call mockLogger.info with default metadata when no metadata is provided', async () => {
const metricId = 'test-id';
const logMessage = 'test message';
logCount(mockLogger, null, metricId, logMessage);

const expectedMetadata = {
metricValue: 1,
metricUnit: 'count',
metricId,
};
expect(mockLogger.info).toHaveBeenCalledWith(expectedMetadata, logMessage);
});

it('should call mockLogger.info with provided metadata merged with defaults', () => {
const metricId = 'metric2';
const logMessage = 'Another log message';
const metadata = {
metricValue: 5,
additionalInfo: 'Extra data'
};
logCount(mockLogger, metadata, metricId, logMessage);

const expectedMetadata = {
metricValue: 5,
metricUnit: 'count',
metricId,
additionalInfo: 'Extra data'
};
expect(mockLogger.info).toHaveBeenCalledWith(expectedMetadata, logMessage);
});
});

describe('logTimer()', () => {
let timer;
beforeEach(() => {
timer = logTimer(mockLogger);
});

it('should create a timer with a start time', () => {
expect(timer.start).toBeDefined();
expect(timer.start).toBeGreaterThan(0);
});

it('should call mockLogger.info with correct metadata and message after log is called', () => {
const metricId = 'timer1';
const logMessage = 'Duration: Any ms';
const metadata = { additionalInfo: 'test' };

timer.log(metadata, metricId, logMessage);

// Calculate the expected duration
const expectedDuration = expect.any(Number);
const expectedMetadata = {
...metadata,
metricValue: expectedDuration,
metricUnit: 'ms',
metricId,
};

// The log message should replace {metricValue} with the duration
const expectedMessage = `Duration: ${expectedDuration} ms`;

expect(mockLogger.info).toHaveBeenCalledWith(expectedMetadata, expectedMessage);
});

});

});
41 changes: 41 additions & 0 deletions libs/core-scanner/src/metric-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { Logger } from 'pino';

export function logCount( logger: Logger, metadata: any, metricId: string, logMessage: string ) {
if( !metadata ) {
metadata = {};
}
const metaDefaults = {
metricValue: 1,
metricUnit: "count",
};
const finalMetadata = {
...metaDefaults,
...metadata,
metricId,
};
logger.info( finalMetadata, logMessage );
};

export function logTimer( logger: Logger ) {
const timer = {
start: Date.now(),
log: (metadata: any, metricId: string, logMessage: string, decimalPrecision = 0 ) => {
const duration = Date.now() - timer.start;
if( !metadata ) {
metadata = {};
}
const finalMetadata = {
...metadata,
metricUnit: "ms",
metricValue: duration,
metricId,
};

const finalMessage = logMessage
.replace(/\{metricValue\}/g, duration.toString());

logger.info( finalMetadata, finalMessage );
}
};
return timer;
};
2 changes: 1 addition & 1 deletion libs/core-scanner/src/pages/primary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ const primaryScan = async (
mobileScan,
] = await promiseAll([
buildDapResult(logger, getOutboundRequests()),
buildThirdPartyResult(response, getOutboundRequests()),
buildThirdPartyResult(logger, response, getOutboundRequests()),
buildCookieResult(page),
buildSeoResult(logger, page, response),
createUswdsScanner({ logger, getCSSRequests }, page)(response),
Expand Down
5 changes: 5 additions & 0 deletions libs/core-scanner/src/scans/third-party.spec.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import { mock } from 'jest-mock-extended';
import { HTTPRequest, HTTPResponse } from 'puppeteer';
import pino from 'pino';

import { buildThirdPartyResult } from './third-party';

const mockLogger = pino();

describe('third-party scan', () => {
it('non-navigation different domains treated as third-parties', async () => {
expect(
await buildThirdPartyResult(
mockLogger,
mock<HTTPResponse>({
url: () => 'https://www.18f.gov/',
}),
Expand Down Expand Up @@ -40,6 +44,7 @@ describe('third-party scan', () => {
).toEqual({
thirdPartyServiceCount: 2,
thirdPartyServiceDomains: 'google.com,test.com',
thirdPartyServiceUrls: 'https://google.com/,https://test.com/',
});
});
});
59 changes: 51 additions & 8 deletions libs/core-scanner/src/scans/third-party.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
import { uniq } from 'lodash';
import { HTTPRequest, HTTPResponse } from 'puppeteer';
import { Logger } from 'pino';
import { logCount, logTimer } from '../metric-utils';

import { ThirdPartyScan } from 'entities/scan-data.entity';

export const buildThirdPartyResult = async (
parentLogger: Logger,
mainResponse: HTTPResponse,
outboundRequests: HTTPRequest[],
): Promise<ThirdPartyScan> => {
const logger = parentLogger.child({ scan: 'third-party-scan' });
const timer = logTimer(logger);
const url = mainResponse && mainResponse.url();
const thirdPartyResult = await thirdPartyServices(outboundRequests, url);
const thirdPartyResult = await thirdPartyServices(logger, outboundRequests, url);
const thirdPartyUrlResult = await thirdPartyServicesUrls(logger, outboundRequests, url);
timer.log({}, 'third-party-scan.timer', `Third-party scan completed in [{metricValue}ms]`);
return {
thirdPartyServiceDomains: thirdPartyResult.domains,
thirdPartyServiceCount: thirdPartyResult.count,
thirdPartyServiceUrls: thirdPartyUrlResult,
};
};

const thirdPartyServices = (
outboundRequests: HTTPRequest[],
finalUrl: string,
): {
domains: string;
count: number;
} => {
export function thirdPartyServices ( parentLogger: Logger, outboundRequests: HTTPRequest[], finalUrl: string, ): { domains: string; count: number; } {
const logger = parentLogger.child({ function: 'thirdPartyServices' });
const parsedUrl = new URL(finalUrl);
const thirdPartyDomains = [];

Expand All @@ -32,8 +35,48 @@ const thirdPartyServices = (
}
}
const deduped = uniq(thirdPartyDomains).filter(Boolean).sort();
logCount(logger, { thirdPartyServiceCount: deduped.length }, 'third-party-services.id', 'Third-party services count: {metricValue}');
return {
domains: deduped.join(','),
count: deduped.length,
};
};

/**
* This function returns the third-party URLs
* @param outboundRequests: HTTPRequest[]
* @param finalUrl: string
* @returns string
*/
export function thirdPartyServicesUrls ( parentLogger: Logger, outboundRequests: HTTPRequest[], finalUrl: string ): string{
const logger = parentLogger.child({ function: 'thirdPartyServicesUrls' });
const parsedUrl = new URL(finalUrl);
const thirdPartyDomains = [];

for (const request of outboundRequests) {
const url = request && new URL(request.url());
if (parsedUrl.hostname != url.hostname && !request.isNavigationRequest()) {
const fullUrl = removeQueryParameters(url.toString());
thirdPartyDomains.push(fullUrl);
}
}
const deduped = uniq(thirdPartyDomains).filter(Boolean).sort();
logCount(logger, { thirdPartyServicesUrls: deduped.length }, 'third-party-services-url.id', 'Third-party services url count: {metricValue}');
return deduped.join(',')
};

/**
* This function removes the query parameters from the URL
* @param url: string
* @returns string
*/
export function removeQueryParameters(url: string): string {
try {
const parsedUrl = new URL(url);
parsedUrl.search = '';

return parsedUrl.toString();
} catch (error) {
throw new Error('Invalid URL');
}
}
1 change: 1 addition & 0 deletions libs/database/src/core-results/core-result.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ describe('CoreResultService', () => {
thirdPartyServiceDomains: null,
thirdPartyServiceCount: null,
cookieDomains: null,
thirdPartyServiceUrls: null,
},
cookieScan: {
domains: '',
Expand Down
3 changes: 3 additions & 0 deletions libs/database/src/core-results/core-result.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ export class CoreResultService {
result.thirdPartyScan.thirdPartyServiceCount;
coreResult.thirdPartyServiceDomains =
result.thirdPartyScan.thirdPartyServiceDomains;
coreResult.thirdPartyServiceUrls =
result.thirdPartyScan.thirdPartyServiceUrls;

// Cookie scan
coreResult.cookieDomains = result.cookieScan.domains;
Expand Down Expand Up @@ -163,6 +165,7 @@ export class CoreResultService {
coreResult.canonicalLink = null;
coreResult.thirdPartyServiceCount = null;
coreResult.thirdPartyServiceDomains = null;
coreResult.thirdPartyServiceUrls = null;
coreResult.finalUrl = null;
coreResult.finalUrlBaseDomain = null;
coreResult.finalUrlWebsite = null;
Expand Down

0 comments on commit 63a061a

Please sign in to comment.