Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle child to parent broker migration #3596

Merged
merged 32 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
348bf3e
Handle child broker to parent broker migration
quanganhdo Nov 25, 2024
4853c58
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Nov 25, 2024
9dfa64d
Fix operation data sorting
quanganhdo Nov 27, 2024
56a777b
Update kwold JSON
quanganhdo Nov 27, 2024
ee69d21
Refactor
quanganhdo Nov 27, 2024
70551e0
Add test cases
quanganhdo Nov 27, 2024
24a8136
Update comments
quanganhdo Nov 27, 2024
0a9a5b5
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Nov 27, 2024
2ffca99
Use distantFuture for preferred run date
quanganhdo Nov 28, 2024
01ad2c2
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Nov 28, 2024
173ffef
Fix SwiftLint
quanganhdo Nov 28, 2024
f629dbf
Add tests
quanganhdo Nov 28, 2024
e6b3bdd
Update test cases
quanganhdo Dec 2, 2024
f4f4636
Add optOutReattempt
quanganhdo Dec 2, 2024
4b17818
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Dec 24, 2024
a6fefee
Update test cases
quanganhdo Dec 24, 2024
d776ea0
Oops
quanganhdo Dec 24, 2024
12de525
Convert Verecor child sites to be parent sites
brianhall Dec 19, 2024
bef9931
Add missing property
quanganhdo Dec 24, 2024
80df190
Fix corrupted JSON
quanganhdo Dec 24, 2024
418b810
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Jan 7, 2025
c00eedf
Revert to old currentScans calculation logic
quanganhdo Jan 8, 2025
f7cd5ed
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Jan 9, 2025
2af7e21
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Jan 9, 2025
09e8f87
Refactor and update tests
quanganhdo Jan 9, 2025
e60fa60
Add comments
quanganhdo Jan 9, 2025
8b0111a
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Jan 10, 2025
1ccfc6d
Address PR comments
quanganhdo Jan 13, 2025
44350a3
Rename optOutReattempt to hoursUntilNextOptOutAttempt
quanganhdo Jan 14, 2025
e7098de
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Jan 14, 2025
44bb2aa
Merge branch 'main' into anh/pir/child-to-parent-broker
quanganhdo Jan 14, 2025
e19a0ae
Use kwold from main
quanganhdo Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class DataBrokerOperation: Operation, @unchecked Sendable {
}
}

private func filterAndSortOperationsData(brokerProfileQueriesData: [BrokerProfileQueryData], operationType: OperationType, priorityDate: Date?) -> [BrokerJobData] {
static func filterAndSortOperationsData(brokerProfileQueriesData: [BrokerProfileQueryData], operationType: OperationType, priorityDate: Date?) -> [BrokerJobData] {
let operationsData: [BrokerJobData]

switch operationType {
Expand All @@ -131,8 +131,8 @@ class DataBrokerOperation: Operation, @unchecked Sendable {

if let priorityDate = priorityDate {
filteredAndSortedOperationsData = operationsData
.filter { $0.preferredRunDate != nil && $0.preferredRunDate! <= priorityDate }
.sorted { $0.preferredRunDate! < $1.preferredRunDate! }
.filtered(using: priorityDate)
.sortedByPreferredRunDate()
} else {
filteredAndSortedOperationsData = operationsData
}
Expand All @@ -152,9 +152,9 @@ class DataBrokerOperation: Operation, @unchecked Sendable {

let brokerProfileQueriesData = allBrokerProfileQueryData.filter { $0.dataBroker.id == dataBrokerID }

let filteredAndSortedOperationsData = filterAndSortOperationsData(brokerProfileQueriesData: brokerProfileQueriesData,
operationType: operationType,
priorityDate: priorityDate)
let filteredAndSortedOperationsData = Self.filterAndSortOperationsData(brokerProfileQueriesData: brokerProfileQueriesData,
operationType: operationType,
priorityDate: priorityDate)

Logger.dataBrokerProtection.debug("filteredAndSortedOperationsData count: \(filteredAndSortedOperationsData.count, privacy: .public) for brokerID \(self.dataBrokerID, privacy: .public)")

Expand Down Expand Up @@ -215,3 +215,40 @@ class DataBrokerOperation: Operation, @unchecked Sendable {
}
}
// swiftlint:enable explicit_non_final_class

extension Array where Element == BrokerJobData {
/// Filters jobs based on their preferred run date:
/// - Opt-out jobs with no preferred run date are included.
/// - Jobs with a preferred run date on or before the priority date are included.
///
/// Note: Opt-out jobs without a preferred run date may be:
/// 1. From child brokers (skipped during runOptOutOperation)
/// 2. From former child brokers now acting as parent brokers (processed if extractedProfile hasn't been removed)
func filtered(using priorityDate: Date) -> [BrokerJobData] {
quanganhdo marked this conversation as resolved.
Show resolved Hide resolved
filter { jobData in
guard let preferredRunDate = jobData.preferredRunDate else {
return jobData is OptOutJobData
}

return preferredRunDate <= priorityDate
}
}

/// Sorts BrokerJobData array based on their preferred run dates.
/// - Jobs with non-nil preferred run dates are sorted in ascending order (earliest date first).
/// - Opt-out jobs with nil preferred run dates come last, maintaining their original relative order.
func sortedByPreferredRunDate() -> [BrokerJobData] {
quanganhdo marked this conversation as resolved.
Show resolved Hide resolved
sorted { lhs, rhs in
switch (lhs.preferredRunDate, rhs.preferredRunDate) {
case (nil, nil):
return false
case (_, nil):
return true
case (nil, _):
return false
case (let lhsRunDate?, let rhsRunDate?):
return lhsRunDate < rhsRunDate
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,11 @@ struct DataBrokerProfileQueryOperationManager: OperationsManager {
}

guard extractedProfile.removedDate == nil else {
Logger.dataBrokerProtection.debug("Profile already extracted, skipping...")
Logger.dataBrokerProtection.debug("Profile already removed, skipping...")
return
}

guard let optOutStep = brokerProfileQueryData.dataBroker.optOutStep(), optOutStep.optOutType != .parentSiteOptOut else {
guard !brokerProfileQueryData.dataBroker.performsOptOutWithinParent() else {
quanganhdo marked this conversation as resolved.
Show resolved Hide resolved
Logger.dataBrokerProtection.debug("Broker opts out in parent, skipping...")
return
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ struct OperationPreferredDateCalculator {
return date.now.addingTimeInterval(calculateNextRunDateOnError(schedulingConfig: schedulingConfig, historyEvents: historyEvents))
case .optOutStarted, .scanStarted, .noMatchFound:
return currentPreferredRunDate
case .optOutConfirmed, .optOutRequested:
case .optOutConfirmed:
return nil
case .optOutRequested:
return date.now.addingTimeInterval(schedulingConfig.maintenanceScan.hoursToSeconds)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,10 @@ extension Date {
static func nowMinus(hours: Int) -> Date {
Calendar.current.date(byAdding: .hour, value: -hours, to: Date()) ?? Date()
}

static func nowPlus(hours: Int) -> Date {
nowMinus(hours: -hours)
}
}

final class DataBrokerProtectionStatsPixels: StatsPixels {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,76 +1,147 @@
{
"name": "Kwold",
"url": "kwold.com",
"version": "0.4.0",
"parent": "verecor.com",
"addedDatetime": 1702965600000,
"optOutUrl": "https://kwold.com/ns/control/privacy",
"steps": [
{
"stepType": "scan",
"scanType": "templatedUrl",
"actions": [
"name": "Kwold",
"url": "kwold.com",
"version": "0.5.0",
"addedDatetime": 1702965600000,
"optOutUrl": "https://kwold.com/ns/control/privacy",
"steps": [
{
"actionType": "navigate",
"id": "878e00ab-dbad-4ca9-a303-645702a36ee2",
"url": "https://kwold.com/profile/search?fname=${firstName}&lname=${lastName}&state=${state}&city=${city}&fage=${age|ageRange}",
"ageRange": [
"18-30",
"31-40",
"41-50",
"51-60",
"61-70",
"71-80",
"81+"
]
"stepType": "scan",
"scanType": "templatedUrl",
"actions": [
{
"actionType": "navigate",
"id": "878e00ab-dbad-4ca9-a303-645702a36ee2",
"url": "https://kwold.com/profile/search?fname=${firstName}&lname=${lastName}&state=${state}&city=${city}&fage=${age|ageRange}",
"ageRange": [
"18-30",
"31-40",
"41-50",
"51-60",
"61-70",
"71-80",
"81+"
]
},
{
"actionType": "extract",
"id": "ec9f8ae6-199e-441b-9722-ffc6737b4595",
"selector": ".card",
"noResultsSelector": "//div[@class='page-404' and h1[starts-with(text(), 'Sorry')]]",
"profile": {
"name": {
"selector": ".card-title",
"beforeText": " ~"
},
"alternativeNamesList": {
"selector": ".//div[@class='card-body']/dl[dt[text()='Known as:']]/dd/ul[@class='list-inline m-0']/li",
"findElements": true
},
"age": {
"beforeText": "years old",
"selector": ".card-title",
"afterText": " ~"
},
"addressCityStateList": {
"selector": ".//div[@class='card-body']/dl[dt[text()='Has lived in:']]/dd/ul[@class='list-inline m-0']/li",
"findElements": true
},
"relativesList": {
"selector": ".//div[@class='card-body']/dl[dt[text()='Related to:']]/dd/ul[@class='list-inline m-0']/li",
"beforeText": ",",
"findElements": true
},
"profileUrl": {
"selector": "a",
"identifierType": "path",
"identifier": "https://kwold.com/pp/${id}"
}
}
}
]
},
{
"actionType": "extract",
"id": "ec9f8ae6-199e-441b-9722-ffc6737b4595",
"selector": ".card",
"noResultsSelector": "//div[@class='page-404' and h1[starts-with(text(), 'Sorry')]]",
"profile": {
"name": {
"selector": ".card-title",
"beforeText": " ~"
},
"alternativeNamesList": {
"selector": ".//div[@class='card-body']/dl[dt[text()='Known as:']]/dd/ul[@class='list-inline m-0']/li",
"findElements": true
},
"age": {
"beforeText": "years old",
"selector": ".card-title",
"afterText": " ~"
},
"addressCityStateList": {
"selector": ".//div[@class='card-body']/dl[dt[text()='Has lived in:']]/dd/ul[@class='list-inline m-0']/li",
"findElements": true
},
"relativesList": {
"selector": ".//div[@class='card-body']/dl[dt[text()='Related to:']]/dd/ul[@class='list-inline m-0']/li",
"beforeText": ",",
"findElements": true
},
"profileUrl": {
"selector": "a",
"identifierType": "path",
"identifier": "https://kwold.com/pp/${id}"
}
}
"stepType": "optOut",
"optOutType": "formOptOut",
"actions": [
{
"actionType": "navigate",
"url": "https://kwold.com/ns/control/privacy",
"id": "037f7920-b9e7-4214-a937-171ec641d641"
},
{
"actionType": "fillForm",
"selector": ".ahm",
"elements": [
{
"type": "fullName",
"selector": "#user_name"
},
{
"type": "email",
"selector": "#user_email"
},
{
"type": "profileUrl",
"selector": "#url"
}
],
"id": "5b9de12f-a52e-4bd0-b6ac-6884377d309b"
},
{
"actionType": "getCaptchaInfo",
"selector": ".g-recaptcha",
"id": "48e5e7a8-af33-4629-a849-2cf926a518a3"
},
{
"actionType": "solveCaptcha",
"selector": ".g-recaptcha",
"id": "bc2d26dc-3eef-478a-a04b-5671a1dbdf8b"
},
{
"actionType": "click",
"elements": [
{
"type": "button",
"selector": ".//button[@type='submit']"
}
],
"id": "7f2a685e-ddad-4c5a-8e80-a6d3a690851f"
},
{
"actionType": "expectation",
"expectations": [
{
"type": "text",
"selector": "body",
"expect": "Your removal request has been received"
}
],
"id": "3a8a6e9d-c9a0-4e59-a8a4-fe4a05f3ce68"
},
{
"actionType": "emailConfirmation",
"pollingTime": 30,
"id": "93ccf84a-a5ce-4dcf-8a78-143610723488"
},
{
"actionType": "expectation",
"expectations": [
{
"type": "text",
"selector": "body",
"expect": "Your information control request has been confirmed."
}
],
"id": "fcddc35b-6298-4f2b-a04c-08a2d6f7ceaa"
}
]
}
]
},
{
"stepType": "optOut",
"optOutType": "parentSiteOptOut",
"actions": []
],
"schedulingConfig": {
"retryError": 48,
"confirmOptOutScan": 72,
"maintenanceScan": 120,
"maxAttempts": -1
}
],
"schedulingConfig": {
"retryError": 48,
"confirmOptOutScan": 72,
"maintenanceScan": 120,
"maxAttempts": -1
}
}
Loading
Loading