Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow config of before_send function to edit or reject events #1515

Merged
merged 31 commits into from
Nov 19, 2024
Merged
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
79b83fa
first bit of type fangling
pauldambra Nov 8, 2024
5165110
draw the rest of the owl
pauldambra Nov 8, 2024
19abae0
Refactor tests
pauldambra Nov 8, 2024
e95364c
fiddle
pauldambra Nov 8, 2024
08b6741
kind to IE11
pauldambra Nov 8, 2024
54d3031
Add some prebuilt functions
pauldambra Nov 10, 2024
070413d
Refactor
pauldambra Nov 10, 2024
2d7b7ed
mock random
pauldambra Nov 11, 2024
4940a23
more events are uneditable
pauldambra Nov 11, 2024
4bac476
rename the function
pauldambra Nov 11, 2024
0efff91
failing events
pauldambra Nov 11, 2024
31244a1
reorg things
pauldambra Nov 11, 2024
db0228b
test doesn't make sense undefined is valid for
pauldambra Nov 11, 2024
d20d0b7
maybe just a warning
pauldambra Nov 11, 2024
56fb955
some more can be unsafe
pauldambra Nov 12, 2024
ba4f6ca
this is unsafe but editable too
pauldambra Nov 12, 2024
4f5b308
this is unsafe but editable too
pauldambra Nov 12, 2024
4afa6c8
ok, process it all :see-no-evil:
pauldambra Nov 13, 2024
3c018f2
Remove out-of-date tests
pauldambra Nov 13, 2024
938a162
deprecate and stop using _onCapture
pauldambra Nov 13, 2024
0cdbd19
oops
pauldambra Nov 13, 2024
aa6df59
ooops
pauldambra Nov 13, 2024
a395047
merge branch 'main' into feat/before-capture
pauldambra Nov 15, 2024
8c76336
sampling example changes
pauldambra Nov 15, 2024
3a4b136
make before_send config explicit
pauldambra Nov 15, 2024
371f90a
allow array of functions
pauldambra Nov 15, 2024
96b2356
A little comment
pauldambra Nov 15, 2024
a44429e
corrected thresholds tracking
pauldambra Nov 15, 2024
482cfa8
reorganise
pauldambra Nov 15, 2024
3eb6d97
Merge branch 'main' into feat/before-capture
pauldambra Nov 18, 2024
d190358
Merge branch 'main' into feat/before-capture
pauldambra Nov 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add some prebuilt functions
pauldambra committed Nov 10, 2024
commit 54d3031e6d2b6644b01f5c64e3939c827a77810b
12 changes: 9 additions & 3 deletions src/__tests__/posthog-core.beforeCapture.test.ts
Original file line number Diff line number Diff line change
@@ -48,20 +48,26 @@ describe('posthog core - before capture', () => {
beforeCapture: rejectingEventFn,
})
;(posthog._send_request as jest.Mock).mockClear()

const capturedData = posthog.capture(eventName, {}, {})

expect(capturedData).toBeUndefined()
expect(posthog._send_request).not.toHaveBeenCalled()
expect(jest.mocked(logger).info).toHaveBeenCalledWith(
`Event '${eventName}' was rejected in beforeCapture function`
)
})

it('can edit an event', () => {
const posthog = posthogWith({
beforeCapture: editingEventFn,
})
;(posthog._send_request as jest.Mock).mockClear()

const capturedData = posthog.capture(eventName, {}, {})

expect(capturedData).toHaveProperty(['properties', 'edited'], true)
expect(capturedData).toHaveProperty(['$set', 'edited'], true)

expect(posthog._send_request).toHaveBeenCalledWith({
batchKey: undefined,
callback: expect.any(Function),
@@ -112,8 +118,8 @@ describe('posthog core - before capture', () => {

posthog.capture(randomUnsafeEditableEvent, {}, {})

expect(jest.mocked(logger).info).toHaveBeenCalledWith(
`Event '${randomUnsafeEditableEvent}' was rejected. This can cause unexpected behavior.`
expect(jest.mocked(logger).warn).toHaveBeenCalledWith(
`Event '${randomUnsafeEditableEvent}' was rejected in beforeCapture function. This can cause unexpected behavior.`
)
})
})
57 changes: 57 additions & 0 deletions src/__tests__/utils/before-capture-utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { sampleByDistinctId, sampleByEvent, sampleBySessionId } from '../../utils/before-capture.utils'
import { CaptureResult } from '../../types'
import { isNull } from '../../utils/type-utils'

function expectRoughlyFiftyPercent(emittedEvents: any[]) {
expect(emittedEvents.length).toBeGreaterThanOrEqual(40)
expect(emittedEvents.length).toBeLessThanOrEqual(60)
}

describe('before capture utils', () => {
it('can sample by event name', () => {
const sampleFn = sampleByEvent(['$autocapture'], 50)
const results = []
Array.from({ length: 100 }).forEach(() => {
const captureResult = { event: '$autocapture' } as unknown as CaptureResult
results.push(sampleFn(captureResult))
})
const emittedEvents = results.filter((r) => !isNull(r))
expectRoughlyFiftyPercent(emittedEvents)
})

it('can sample by distinct id', () => {
const sampleFn = sampleByDistinctId(50)
const results = []
const distinct_id_one = 'user-1'
const distinct_id_two = 'user-that-hashes-to-no-events'
Array.from({ length: 100 }).forEach(() => {
;[distinct_id_one, distinct_id_two].forEach((distinct_id) => {
const captureResult = { properties: { distinct_id } } as unknown as CaptureResult
results.push(sampleFn(captureResult))
})
})
const distinctIdOneEvents = results.filter((r) => !isNull(r) && r.properties.distinct_id === distinct_id_one)
const distinctIdTwoEvents = results.filter((r) => !isNull(r) && r.properties.distinct_id === distinct_id_two)

expect(distinctIdOneEvents.length).toBe(100)
expect(distinctIdTwoEvents.length).toBe(0)
})

it('can sample by session id', () => {
const sampleFn = sampleBySessionId(50)
const results = []
const session_id_one = 'a-session-id'
const session_id_two = 'id-that-hashes-to-not-sending-events'
Array.from({ length: 100 }).forEach(() => {
;[session_id_one, session_id_two].forEach((session_id) => {
const captureResult = { properties: { $session_id: session_id } } as unknown as CaptureResult
results.push(sampleFn(captureResult))
})
})
const sessionIdOneEvents = results.filter((r) => !isNull(r) && r.properties.$session_id === session_id_one)
const sessionIdTwoEvents = results.filter((r) => !isNull(r) && r.properties.$session_id === session_id_two)

expect(sessionIdOneEvents.length).toBe(100)
expect(sessionIdTwoEvents.length).toBe(0)
})
})
5 changes: 4 additions & 1 deletion src/posthog-core.ts
Original file line number Diff line number Diff line change
@@ -883,8 +883,11 @@ export class PostHog {
if (!isKnownUnEditableEvent(data.event)) {
const beforeCaptureResult = this.config.beforeCapture(data)
if (isNullish(beforeCaptureResult)) {
const logMessage = `Event '${data.event}' was rejected in beforeCapture function`
if (isKnownUnsafeEditableEvent(data.event)) {
logger.info(`Event '${data.event}' was rejected. This can cause unexpected behavior.`)
logger.warn(`${logMessage}. This can cause unexpected behavior.`)
} else {
logger.info(logMessage)
}
return
} else {
6 changes: 4 additions & 2 deletions src/types.ts
Original file line number Diff line number Diff line change
@@ -41,9 +41,11 @@ export const knownUnsafeEditableEvent = [
export type KnownUnsafeEditableEvent = typeof knownUnsafeEditableEvent[number]

/**
* These known events can be processed by the `beforeCapture` function
* These are known events PostHog events that can be processed by the `beforeCapture` function
* That means PostHog functionality does not rely on receiving 100% of these for calculations
* So, it is safe to sample them to reduce the volume of events sent to PostHog
*/
type KnownEventName =
export type KnownEventName =
| '$heatmaps_data'
| '$opt_in'
| '$exception'
64 changes: 64 additions & 0 deletions src/utils/before-capture.utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { clampToRange } from './number-utils'
import { CaptureResult, KnownEventName } from '../types'
import { includes } from './index'

function simpleHash(str: string) {
let hash = 0
for (let i = 0; i < str.length; i++) {
hash = (hash << 5) - hash + str.charCodeAt(i) // (hash * 31) + char code
hash |= 0 // Convert to 32bit integer
}
return Math.abs(hash)
}

/**
* An implementation of sampling that samples based on the distinct ID.
* Can be used to create a beforeCapture fn for a PostHog instance.
*
* Causes roughly 50% of distinct ids to have events sent.
* Not 50% of events for each distinct id.
*
* @param percent a number from 0 to 100, 100 means never sample, 0 means never send the event
*/
export function sampleByDistinctId(percent: number): (c: CaptureResult) => CaptureResult | null {
return (captureResult: CaptureResult): CaptureResult | null => {
const hash = simpleHash(captureResult.properties.distinct_id)
return hash % 100 < clampToRange(percent, 0, 100) ? captureResult : null
}
}

/**
* An implementation of sampling that samples based on the session ID.
* Can be used to create a beforeCapture fn for a PostHog instance.
*
* Causes roughly 50% of sessions to have events sent.
* Not 50% of events for each session.
*
* @param percent a number from 0 to 100, 100 means never sample, 0 means never send the event
*/
export function sampleBySessionId(percent: number): (c: CaptureResult) => CaptureResult | null {
return (captureResult: CaptureResult): CaptureResult | null => {
const hash = simpleHash(captureResult.properties.$session_id)
return hash % 100 < clampToRange(percent, 0, 100) ? captureResult : null
}
}

/**
* An implementation of sampling that samples based on the event name.
* Can be used to create a beforeCapture fn for a PostHog instance.
*
* @param eventNames an array of event names to sample, sampling is applied across events not per event name
* @param percent a number from 0 to 100, 100 means never sample, 0 means never send the event
*/
export function sampleByEvent(
eventNames: KnownEventName[],
percent: number
): (c: CaptureResult) => CaptureResult | null {
return (captureResult: CaptureResult): CaptureResult | null => {
if (!includes(eventNames, captureResult.event)) {
return captureResult
}

return Math.random() * 100 < clampToRange(percent, 0, 100) ? captureResult : null
}
}