Skip to content

Commit

Permalink
fix: enhancing the traffic acquisition data per referrer
Browse files Browse the repository at this point in the history
  • Loading branch information
rpapani committed Sep 14, 2024
1 parent d20e62e commit 682c671
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 62 deletions.
50 changes: 50 additions & 0 deletions packages/spacecat-shared-rum-api-client/src/common/aggregateFns.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
* governing permissions and limitations under the License.
*/

import { extractTrafficHints, classifyUTMSource } from './traffic.js';

/**
* Calculates the total page views by URL from an array of bundles.
* @param {Array<Object>} bundles - An array of RUM bundles (NOT Flat bundles).
Expand Down Expand Up @@ -52,6 +54,53 @@ function getCTRByUrl(bundles) {
}, {});
}

/**
* Calculates the Click-Through Rate (CTR) by URL and Referrer obtained from utm_source.
* CTR is defined as the total number of sessions with at least one click event per referrer.
* divided by the total number of pageviews for each URL per referrer.
*
* @param {Array<Object>} bundles - An array of RUM bundles (NOT Flat bundles).
* @returns {Object} - An object where the key is the URL and the value is an object
* with the CTR value by referrer.
*/
function getCTRByUrlAndChannel(bundles) {
const aggregated = bundles.reduce((acc, bundle) => {
const { url } = bundle;
const trafficHints = extractTrafficHints(bundle);
const channel = classifyUTMSource(trafficHints.utmSource);
if (!acc[url]) {
acc[url] = { sessionsWithClick: 0, totalPageviews: 0, channels: {} };
}
const hasClick = bundle.events.some((event) => event.checkpoint === 'click');

acc[url].totalPageviews += bundle.weight;
if (hasClick) {
acc[url].sessionsWithClick += bundle.weight;
}
if (channel) {
if (!acc[url].channels[channel]) {
acc[url].channels[channel] = { sessionsWithClick: 0, totalPageviews: 0 };
}
acc[url].channels[channel].totalPageviews += bundle.weight;
if (hasClick) {
acc[url].channels[channel].sessionsWithClick += bundle.weight;
}
}
return acc;
}, {});
return Object.entries(aggregated)
.reduce((acc, [url, { sessionsWithClick, totalPageviews, channels }]) => {
acc[url].value = (sessionsWithClick / totalPageviews);
acc[url].channels = Object.entries(channels)
.reduce((_acc, [source, { _sessionsWithClick, _totalPageviews }]) => {
// eslint-disable-next-line no-param-reassign
_acc[source] = (_sessionsWithClick / _totalPageviews);
return _acc;
}, {});
return acc;
}, {});
}

/**
* Calculates the Click-Through Rate (CTR) average for the entire site.
* CTR is defined as the total number of sessions with at least one click event
Expand All @@ -78,5 +127,6 @@ function getSiteAvgCTR(bundles) {
export {
getSiteAvgCTR,
getCTRByUrl,
getCTRByUrlAndChannel,
pageviewsByUrl,
};
54 changes: 53 additions & 1 deletion packages/spacecat-shared-rum-api-client/src/common/traffic.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,21 @@ const sources = {
social: /^\b(ig|fb|x|soc)\b|(.*(meta|tiktok|facebook|snapchat|twitter|igshopping|instagram|linkedin|reddit).*)$/,
search: /^\b(goo)\b|(.*(sea|google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask).*)$/,
video: /youtube|vimeo|twitch|dailymotion|wistia/,
display: /optumib2b|jun|googleads|dv36|dv360|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
display: /optumib2b|jun|googleads|dv360|dv36|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
affiliate: /brandreward|yieldkit|fashionistatop|partner|linkbux|stylesblog|linkinbio|affiliate/,
email: /sfmc|email/,
};

// Indexes of the mathcing groups in the regexes above, to obtain the utm source string
const sourceGroupingIndex = {
social: [1, 3],
search: [1, 3],
video: [0],
display: [0],
affiliate: [0],
email: [0],
};

// Tracking params - based on the checkpoints we have in rum-enhancer now
// const organicTrackingParams = ['srsltid']; WE DO NOT HAVE THIS AS OF NOW
const paidTrackingParams = ['paid'];
Expand Down Expand Up @@ -160,6 +170,46 @@ const RULES = (domain) => ([
{ type: 'owned', category: 'uncategorized', referrer: any, utmSource: any, utmMedium: any, tracking: any },
]);

export function extractTrafficHints(bundle) {
const findEvent = (checkpoint, source = '') => bundle.events.find((e) => e.checkpoint === checkpoint && (!source || e.source === source)) || {};

const referrer = findEvent('enter').source || '';
const utmSource = findEvent('utm', 'utm_source').target || '';
const utmMedium = findEvent('utm', 'utm_medium').target || '';
const tracking = findEvent('paid').checkpoint || findEvent('email').checkpoint || '';

return {
url: bundle.url,
weight: bundle.weight,
referrer,
utmSource,
utmMedium,
tracking,
};
}

/**
* Returns the name of the utm source as single word, for example: facebook instead of facebook.com
* @param {*} utmSource
*/
export function classifyUTMSource(utmSource) {
if (!utmSource) return '';
let classifiedSource = '';
for (const [source, regex] of Object.entries(sources)) {
const match = utmSource.match(regex);
if (match) {
const indexes = sourceGroupingIndex[source];
const classifiedSourceIndex = indexes.find((index) => match[index]);
if (classifiedSourceIndex === undefined) {
classifiedSource = '';
} else {
classifiedSource = match[classifiedSourceIndex];
}
}
}
return classifiedSource;
}

export function classifyTrafficSource(url, referrer, utmSource, utmMedium, trackingParams) {
const secondLevelDomain = getSecondLevelDomain(url);
const rules = RULES(secondLevelDomain);
Expand All @@ -174,9 +224,11 @@ export function classifyTrafficSource(url, referrer, utmSource, utmMedium, track
&& rule.utmMedium(sanitize(utmMedium))
&& rule.tracking(trackingParams)
));
const channel = classifyUTMSource(utmSource);

return {
type,
category,
channel,
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,33 @@
*/

import trafficAcquisition from '../traffic-acquisition.js';
import { getCTRByUrl, getSiteAvgCTR } from '../../common/aggregateFns.js';
import { getCTRByUrlAndChannel, getSiteAvgCTR } from '../../common/aggregateFns.js';

const DAILY_EARNED_THRESHOLD = 5000;
const CTR_THRESHOLD_RATIO = 0.95;
const DAILY_PAGEVIEW_THRESHOLD = 1000;
const CHANNELS_TO_CONSIDER = 5;

function convertToOpportunity(traffic) {
const {
url, total, ctr, paid, owned, earned, siteAvgCTR,
url, total, ctr, paid, owned, earned, channels, siteAvgCTR, ctrByUrlAndChannel,
} = traffic;

return {
const topChannels = Object.entries(channels)
.sort((a, b) => b[1].total - a[1].total).slice(0, CHANNELS_TO_CONSIDER);

const opportunity = {
type: 'high-organic-low-ctr',
page: url,
screenshot: '',
trackedPageKPIName: 'Click Through Rate',
trackedPageKPIValue: ctr,
pageViews: total,
samples: total, // todo: get the actual number of samples
siteAverage: siteAvgCTR,
metrics: [{
type: 'traffic',
referrer: '*',
value: {
total,
paid,
Expand All @@ -40,12 +46,35 @@ function convertToOpportunity(traffic) {
},
}, {
type: 'ctr',
referrer: '*',
value: {
page: ctr,
siteAverage: siteAvgCTR,
},
}],
};
opportunity.metrics.push(...topChannels.map(([channel, {
_total, _owned, _earned, _paid,
}]) => {
const trafficMetrics = {
type: 'traffic',
referrer: channel,
value: {
_total,
_owned,
_earned,
_paid,
},
};
const ctrMetrics = {
type: 'ctr',
referrer: channel,
value: {
page: ctrByUrlAndChannel[channel],
},
};
return [trafficMetrics, ctrMetrics];
}));
return opportunity;
}

function hasHighOrganicTraffic(interval, traffic) {
Expand All @@ -61,13 +90,18 @@ function handler(bundles, opts = {}) {
const { interval = 7 } = opts;

const trafficByUrl = trafficAcquisition.handler(bundles);
const ctrByUrl = getCTRByUrl(bundles);
const ctrByUrlAndChannel = getCTRByUrlAndChannel(bundles);
const siteAvgCTR = getSiteAvgCTR(bundles);

return trafficByUrl.filter((traffic) => traffic.total > interval * DAILY_PAGEVIEW_THRESHOLD)
.filter(hasHighOrganicTraffic.bind(null, interval))
.filter((traffic) => hasLowerCTR(ctrByUrl[traffic.url], siteAvgCTR))
.map((traffic) => ({ ...traffic, ctr: ctrByUrl[traffic.url], siteAvgCTR }))
.filter((traffic) => hasLowerCTR(ctrByUrlAndChannel[traffic.url].value, siteAvgCTR))
.map((traffic) => ({
...traffic,
ctr: ctrByUrlAndChannel[traffic.url].value,
siteAvgCTR,
ctrByUrlAndChannel: ctrByUrlAndChannel[traffic.url].channels,
}))
.map(convertToOpportunity);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,29 @@
* governing permissions and limitations under the License.
*/

import { classifyTrafficSource } from '../common/traffic.js';
import { classifyTrafficSource, extractTrafficHints } from '../common/traffic.js';

const MAIN_TYPES = ['total', 'paid', 'earned', 'owned'];

function extractHints(bundle) {
const findEvent = (checkpoint, source = '') => bundle.events.find((e) => e.checkpoint === checkpoint && (!source || e.source === source)) || {};

const referrer = findEvent('enter').source || '';
const utmSource = findEvent('utm', 'utm_source').target || '';
const utmMedium = findEvent('utm', 'utm_medium').target || '';
const tracking = findEvent('paid').checkpoint || findEvent('email').checkpoint || '';

return {
url: bundle.url,
weight: bundle.weight,
referrer,
utmSource,
utmMedium,
tracking,
};
}

function collectByUrlAndTrafficSource(acc, { url, weight, trafficSource }) {
function collectByUrlAndTrafficSource(acc, {
url, weight, trafficSource, channel,
}) {
acc[url] = acc[url] || {
total: 0, owned: 0, earned: 0, paid: 0,
total: 0, owned: 0, earned: 0, paid: 0, channels: {},
};
acc[url][trafficSource] = (acc[url][trafficSource] || 0) + weight;
acc[url].total += weight;
acc[url][trafficSource.split(':')[0]] += weight;
const trafficType = trafficSource.split(':')[0];
acc[url][trafficType] += weight;
if (channel) {
if (!acc[url].channels[channel]) {
acc[url].channels[channel] = {
total: 0, owned: 0, earned: 0, paid: 0,
};
}
acc[url].channels[channel].total += weight;
acc[url].channels[channel][trafficType] += weight;
}
return acc;
}

Expand All @@ -50,23 +44,26 @@ function transformFormat(trafficSources) {
owned: value.owned,
paid: value.paid,
sources: Object.entries(value)
.filter(([source]) => !MAIN_TYPES.includes(source))
.filter(([source]) => !MAIN_TYPES.includes(source) && source !== 'channels')
.map(([source, views]) => ({ type: source, views })),
channels: value.channels,
}));
}

function handler(bundles) {
const trafficSources = bundles
.map(extractHints)
.map(extractTrafficHints)
.map((row) => {
const {
type,
category,
channel,
} = classifyTrafficSource(row.url, row.referrer, row.utmSource, row.utmMedium, row.tracking);
return {
url: row.url,
weight: row.weight,
trafficSource: `${type}:${category}`,
channel,
};
})
.reduce(collectByUrlAndTrafficSource, {});
Expand Down
Loading

0 comments on commit 682c671

Please sign in to comment.