From 06d5827f49866b6f6288e893a06cee7ef4737676 Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 10:42:26 -0800 Subject: [PATCH 1/9] Normalize equivalent domain names --- CHANGELOG.md | 5 +++++ lib/minfraud/components/email.rb | 16 ++++++++++++++++ spec/components/email_spec.rb | 1 + 3 files changed, 22 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87cd8628..5a0d3bdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## v2.5.0 + +* Equivalent domain names are now normalized when `hash_address` is used. + For example, `googlemail.com` will become `gmail.com`. + ## v2.4.0 (2024-01-12) * Ruby 2.7+ is now required. If you're using Ruby 2.5 or 2.6, please use diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index 1e1652d8..e26b2535 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -113,6 +113,18 @@ def clean_email_address(address) }.freeze private_constant :TYPO_DOMAINS + EQUIVALENT_DOMAINS = { + 'googlemail.com' => 'gmail.com', + 'pm.me' => 'protonmail.com', + 'proton.me' => 'protonmail.com', + 'yandex.by' => 'yandex.ru', + 'yandex.com' => 'yandex.ru', + 'yandex.kz' => 'yandex.ru', + 'yandex.ua' => 'yandex.ru', + 'ya.ru' => 'yandex.ru', + }.freeze + private_constant :EQUIVALENT_DOMAINS + def clean_domain(domain) domain = domain.strip @@ -125,6 +137,10 @@ def clean_domain(domain) domain = TYPO_DOMAINS[domain] end + if EQUIVALENT_DOMAINS.key?(domain) + domain = EQUIVALENT_DOMAINS[domain] + end + domain end end diff --git a/spec/components/email_spec.rb b/spec/components/email_spec.rb index 16d82ca3..385b9b76 100644 --- a/spec/components/email_spec.rb +++ b/spec/components/email_spec.rb @@ -71,6 +71,7 @@ { input: 'Test-foo@gmail.com', output: 'test-foo@gmail.com' }, { input: 'gamil.com@gamil.com', output: 'gamil.com@gmail.com' }, { input: 'Test+alias@bücher.com', output: 'test@xn--bcher-kva.com' }, + { input: 'foo@googlemail.com', output: 'foo@gmail.com' }, ] tests.each do |i| From 6c43d638dc4c8764e6cdc3074b1a990c3ecae515 Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 10:47:23 -0800 Subject: [PATCH 2/9] Remove periods from gmail.com local parts --- CHANGELOG.md | 3 +++ lib/minfraud/components/email.rb | 4 ++++ spec/components/email_spec.rb | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a0d3bdc..4512cfa5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ * Equivalent domain names are now normalized when `hash_address` is used. For example, `googlemail.com` will become `gmail.com`. +* Periods are now removed from `gmail.com` email address local parts when + `hash_address` is used. For example, `f.o.o@gmail.com` will become + `foo@gmail.com`. ## v2.4.0 (2024-01-12) diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index e26b2535..4d0e8632 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -96,6 +96,10 @@ def clean_email_address(address) local_part.sub!(/\A([^+]+)\+.*\z/, '\1') end + if domain == 'gmail.com' + local_part.gsub!('.', '') + end + "#{local_part}@#{domain}" end diff --git a/spec/components/email_spec.rb b/spec/components/email_spec.rb index 385b9b76..741a0831 100644 --- a/spec/components/email_spec.rb +++ b/spec/components/email_spec.rb @@ -69,9 +69,10 @@ { input: 'Test-foo@yahoo.com', output: 'test@yahoo.com' }, { input: 'Test-foo-foo2@yahoo.com', output: 'test@yahoo.com' }, { input: 'Test-foo@gmail.com', output: 'test-foo@gmail.com' }, - { input: 'gamil.com@gamil.com', output: 'gamil.com@gmail.com' }, + { input: 'gamil.com@gamil.com', output: 'gamilcom@gmail.com' }, { input: 'Test+alias@bücher.com', output: 'test@xn--bcher-kva.com' }, { input: 'foo@googlemail.com', output: 'foo@gmail.com' }, + { input: 'foo.bar@gmail.com', output: 'foobar@gmail.com' }, ] tests.each do |i| From 7497bda08d6db37c0cea250ae4f32ccf05c2ed3c Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 12:06:22 -0800 Subject: [PATCH 3/9] Normalize fastmail alias subdomains --- CHANGELOG.md | 3 + lib/minfraud/components/email.rb | 132 +++++++++++++++++++++++++++++++ spec/components/email_spec.rb | 1 + 3 files changed, 136 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4512cfa5..a7c823ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ * Periods are now removed from `gmail.com` email address local parts when `hash_address` is used. For example, `f.o.o@gmail.com` will become `foo@gmail.com`. +* Fastmail alias subdomain email addresses are now normalized when + `hash_address` is used. For example, `alias@user.fastmail.com` will + become `user@fastmail.com`. ## v2.4.0 (2024-01-12) diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index 4d0e8632..7332a771 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -100,6 +100,17 @@ def clean_email_address(address) local_part.gsub!('.', '') end + domain_parts = domain.split('.') + if domain_parts.length > 2 + possible_domain = domain_parts[1..].join('.') + if FASTMAIL_DOMAINS.key?(possible_domain) + domain = possible_domain + if local_part != '' + local_part = domain_parts[0] + end + end + end + "#{local_part}@#{domain}" end @@ -129,6 +140,127 @@ def clean_email_address(address) }.freeze private_constant :EQUIVALENT_DOMAINS + FASTMAIL_DOMAINS = { + '123mail.org' => true, + '150mail.com' => true, + '150ml.com' => true, + '16mail.com' => true, + '2-mail.com' => true, + '4email.net' => true, + '50mail.com' => true, + 'airpost.net' => true, + 'allmail.net' => true, + 'bestmail.us' => true, + 'cluemail.com' => true, + 'elitemail.org' => true, + 'emailcorner.net' => true, + 'emailengine.net' => true, + 'emailengine.org' => true, + 'emailgroups.net' => true, + 'emailplus.org' => true, + 'emailuser.net' => true, + 'eml.cc' => true, + 'f-m.fm' => true, + 'fast-email.com' => true, + 'fast-mail.org' => true, + 'fastem.com' => true, + 'fastemail.us' => true, + 'fastemailer.com' => true, + 'fastest.cc' => true, + 'fastimap.com' => true, + 'fastmail.cn' => true, + 'fastmail.co.uk' => true, + 'fastmail.com' => true, + 'fastmail.com.au' => true, + 'fastmail.de' => true, + 'fastmail.es' => true, + 'fastmail.fm' => true, + 'fastmail.fr' => true, + 'fastmail.im' => true, + 'fastmail.in' => true, + 'fastmail.jp' => true, + 'fastmail.mx' => true, + 'fastmail.net' => true, + 'fastmail.nl' => true, + 'fastmail.org' => true, + 'fastmail.se' => true, + 'fastmail.to' => true, + 'fastmail.tw' => true, + 'fastmail.uk' => true, + 'fastmail.us' => true, + 'fastmailbox.net' => true, + 'fastmessaging.com' => true, + 'fea.st' => true, + 'fmail.co.uk' => true, + 'fmailbox.com' => true, + 'fmgirl.com' => true, + 'fmguy.com' => true, + 'ftml.net' => true, + 'h-mail.us' => true, + 'hailmail.net' => true, + 'imap-mail.com' => true, + 'imap.cc' => true, + 'imapmail.org' => true, + 'inoutbox.com' => true, + 'internet-e-mail.com' => true, + 'internet-mail.org' => true, + 'internetemails.net' => true, + 'internetmailing.net' => true, + 'jetemail.net' => true, + 'justemail.net' => true, + 'letterboxes.org' => true, + 'mail-central.com' => true, + 'mail-page.com' => true, + 'mailandftp.com' => true, + 'mailas.com' => true, + 'mailbolt.com' => true, + 'mailc.net' => true, + 'mailcan.com' => true, + 'mailforce.net' => true, + 'mailftp.com' => true, + 'mailhaven.com' => true, + 'mailingaddress.org' => true, + 'mailite.com' => true, + 'mailmight.com' => true, + 'mailnew.com' => true, + 'mailsent.net' => true, + 'mailservice.ms' => true, + 'mailup.net' => true, + 'mailworks.org' => true, + 'ml1.net' => true, + 'mm.st' => true, + 'myfastmail.com' => true, + 'mymacmail.com' => true, + 'nospammail.net' => true, + 'ownmail.net' => true, + 'petml.com' => true, + 'postinbox.com' => true, + 'postpro.net' => true, + 'proinbox.com' => true, + 'promessage.com' => true, + 'realemail.net' => true, + 'reallyfast.biz' => true, + 'reallyfast.info' => true, + 'rushpost.com' => true, + 'sent.as' => true, + 'sent.at' => true, + 'sent.com' => true, + 'speedpost.net' => true, + 'speedymail.org' => true, + 'ssl-mail.com' => true, + 'swift-mail.com' => true, + 'the-fastest.net' => true, + 'the-quickest.com' => true, + 'theinternetemail.com' => true, + 'veryfast.biz' => true, + 'veryspeedy.net' => true, + 'warpmail.net' => true, + 'xsmail.com' => true, + 'yepmail.net' => true, + 'your-mail.com' => true, + }.freeze + private_constant :FASTMAIL_DOMAINS + def clean_domain(domain) domain = domain.strip diff --git a/spec/components/email_spec.rb b/spec/components/email_spec.rb index 741a0831..3bbfb999 100644 --- a/spec/components/email_spec.rb +++ b/spec/components/email_spec.rb @@ -73,6 +73,7 @@ { input: 'Test+alias@bücher.com', output: 'test@xn--bcher-kva.com' }, { input: 'foo@googlemail.com', output: 'foo@gmail.com' }, { input: 'foo.bar@gmail.com', output: 'foobar@gmail.com' }, + { input: 'alias@user.fastmail.com', output: 'user@fastmail.com' }, ] tests.each do |i| From 78939dbb5d70b4fd18f2a52c58b08fed7b072a89 Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 12:18:14 -0800 Subject: [PATCH 4/9] Remove alias parts from additional yahoo domains --- CHANGELOG.md | 4 +++ lib/minfraud/components/email.rb | 59 +++++++++++++++++++++++++++++++- spec/components/email_spec.rb | 1 + 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a7c823ef..1f8cf996 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ * Fastmail alias subdomain email addresses are now normalized when `hash_address` is used. For example, `alias@user.fastmail.com` will become `user@fastmail.com`. +* Additional `yahoo.com` email addresses now have aliases removed from + their local part when `hash_address` is used. For example, + `foo-bar@yahoo.com` will become `foo@yahoo.com` for additional + `yahoo.com` domains. ## v2.4.0 (2024-01-12) diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index 7332a771..1fe5a8bb 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -90,7 +90,7 @@ def clean_email_address(address) domain = clean_domain(domain) - if domain == 'yahoo.com' + if YAHOO_DOMAINS.key?(domain) local_part.sub!(/\A([^-]+)-.*\z/, '\1') else local_part.sub!(/\A([^+]+)\+.*\z/, '\1') @@ -261,6 +261,63 @@ def clean_email_address(address) }.freeze private_constant :FASTMAIL_DOMAINS + YAHOO_DOMAINS = { + 'y7mail.com' => true, + 'yahoo.at' => true, + 'yahoo.be' => true, + 'yahoo.bg' => true, + 'yahoo.ca' => true, + 'yahoo.cl' => true, + 'yahoo.co.id' => true, + 'yahoo.co.il' => true, + 'yahoo.co.in' => true, + 'yahoo.co.kr' => true, + 'yahoo.co.nz' => true, + 'yahoo.co.th' => true, + 'yahoo.co.uk' => true, + 'yahoo.co.za' => true, + 'yahoo.com' => true, + 'yahoo.com.ar' => true, + 'yahoo.com.au' => true, + 'yahoo.com.br' => true, + 'yahoo.com.co' => true, + 'yahoo.com.hk' => true, + 'yahoo.com.hr' => true, + 'yahoo.com.mx' => true, + 'yahoo.com.my' => true, + 'yahoo.com.pe' => true, + 'yahoo.com.ph' => true, + 'yahoo.com.sg' => true, + 'yahoo.com.tr' => true, + 'yahoo.com.tw' => true, + 'yahoo.com.ua' => true, + 'yahoo.com.ve' => true, + 'yahoo.com.vn' => true, + 'yahoo.cz' => true, + 'yahoo.de' => true, + 'yahoo.dk' => true, + 'yahoo.ee' => true, + 'yahoo.es' => true, + 'yahoo.fi' => true, + 'yahoo.fr' => true, + 'yahoo.gr' => true, + 'yahoo.hu' => true, + 'yahoo.ie' => true, + 'yahoo.in' => true, + 'yahoo.it' => true, + 'yahoo.lt' => true, + 'yahoo.lv' => true, + 'yahoo.nl' => true, + 'yahoo.no' => true, + 'yahoo.pl' => true, + 'yahoo.pt' => true, + 'yahoo.ro' => true, + 'yahoo.se' => true, + 'yahoo.sk' => true, + 'ymail.com' => true, + }.freeze + private_constant :YAHOO_DOMAINS + def clean_domain(domain) domain = domain.strip diff --git a/spec/components/email_spec.rb b/spec/components/email_spec.rb index 3bbfb999..34860081 100644 --- a/spec/components/email_spec.rb +++ b/spec/components/email_spec.rb @@ -74,6 +74,7 @@ { input: 'foo@googlemail.com', output: 'foo@gmail.com' }, { input: 'foo.bar@gmail.com', output: 'foobar@gmail.com' }, { input: 'alias@user.fastmail.com', output: 'user@fastmail.com' }, + { input: 'foo-bar@ymail.com', output: 'foo@ymail.com' }, ] tests.each do |i| From 9c832ae9ed1f61ed59b5fd4260fb2032b7fc76e5 Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 12:23:38 -0800 Subject: [PATCH 5/9] Remove duplicate .com strings --- CHANGELOG.md | 3 +++ lib/minfraud/components/email.rb | 2 ++ spec/components/email_spec.rb | 1 + 3 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f8cf996..f3694b46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ their local part when `hash_address` is used. For example, `foo-bar@yahoo.com` will become `foo@yahoo.com` for additional `yahoo.com` domains. +* Duplicate `.com`s are now removed from email domain names when + `hash_address` is used. For example, `example.com.com` will become + `example.com`. ## v2.4.0 (2024-01-12) diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index 1fe5a8bb..fe371eee 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -326,6 +326,8 @@ def clean_domain(domain) domain = SimpleIDN.to_ascii(domain) + domain.sub!(/(?:\.com){2,}$/, '.com') + if TYPO_DOMAINS.key?(domain) domain = TYPO_DOMAINS[domain] end diff --git a/spec/components/email_spec.rb b/spec/components/email_spec.rb index 34860081..d8fd2054 100644 --- a/spec/components/email_spec.rb +++ b/spec/components/email_spec.rb @@ -75,6 +75,7 @@ { input: 'foo.bar@gmail.com', output: 'foobar@gmail.com' }, { input: 'alias@user.fastmail.com', output: 'user@fastmail.com' }, { input: 'foo-bar@ymail.com', output: 'foo@ymail.com' }, + { input: 'foo@example.com.com', output: 'foo@example.com' }, ] tests.each do |i| From 4cf2c56517ef2a60c1d29aebc47456305b803303 Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 12:26:09 -0800 Subject: [PATCH 6/9] Remove extraneous characters after .com --- CHANGELOG.md | 3 +++ lib/minfraud/components/email.rb | 1 + spec/components/email_spec.rb | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3694b46..4766a932 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ * Duplicate `.com`s are now removed from email domain names when `hash_address` is used. For example, `example.com.com` will become `example.com`. +* Extraneous characters after `.com` are now removed from email domain + names when `hash_address` is used. For example, `example.comfoo` will + become `example.com`. ## v2.4.0 (2024-01-12) diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index fe371eee..380855ab 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -327,6 +327,7 @@ def clean_domain(domain) domain = SimpleIDN.to_ascii(domain) domain.sub!(/(?:\.com){2,}$/, '.com') + domain.sub!(/\.com[^.]+$/, '.com') if TYPO_DOMAINS.key?(domain) domain = TYPO_DOMAINS[domain] diff --git a/spec/components/email_spec.rb b/spec/components/email_spec.rb index d8fd2054..86664fc8 100644 --- a/spec/components/email_spec.rb +++ b/spec/components/email_spec.rb @@ -63,7 +63,7 @@ { input: ' Test@maxmind.com', output: 'test@maxmind.com' }, { input: 'Test@maxmind.com|abc124472372', - output: 'test@maxmind.com|abc124472372', + output: 'test@maxmind.com', }, { input: 'Test+foo@yahoo.com', output: 'test+foo@yahoo.com' }, { input: 'Test-foo@yahoo.com', output: 'test@yahoo.com' }, @@ -76,6 +76,7 @@ { input: 'alias@user.fastmail.com', output: 'user@fastmail.com' }, { input: 'foo-bar@ymail.com', output: 'foo@ymail.com' }, { input: 'foo@example.com.com', output: 'foo@example.com' }, + { input: 'foo@example.comfoo', output: 'foo@example.com' }, ] tests.each do |i| From e3a87df12bd7c19e462e07fdc0b96d8e0934d47b Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 12:28:44 -0800 Subject: [PATCH 7/9] Normalize some .com typos --- CHANGELOG.md | 2 ++ lib/minfraud/components/email.rb | 1 + spec/components/email_spec.rb | 1 + 3 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4766a932..e44783d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ * Extraneous characters after `.com` are now removed from email domain names when `hash_address` is used. For example, `example.comfoo` will become `example.com`. +* Certain `.com` typos are now normalized to `.com` when `hash_address` is + used. For example, `example.cam` will become `example.com`. ## v2.4.0 (2024-01-12) diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index 380855ab..a52c2c8e 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -328,6 +328,7 @@ def clean_domain(domain) domain.sub!(/(?:\.com){2,}$/, '.com') domain.sub!(/\.com[^.]+$/, '.com') + domain.sub!(/(?:\.(?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$/, '.com') if TYPO_DOMAINS.key?(domain) domain = TYPO_DOMAINS[domain] diff --git a/spec/components/email_spec.rb b/spec/components/email_spec.rb index 86664fc8..6681ca92 100644 --- a/spec/components/email_spec.rb +++ b/spec/components/email_spec.rb @@ -77,6 +77,7 @@ { input: 'foo-bar@ymail.com', output: 'foo@ymail.com' }, { input: 'foo@example.com.com', output: 'foo@example.com' }, { input: 'foo@example.comfoo', output: 'foo@example.com' }, + { input: 'foo@example.cam', output: 'foo@example.com' }, ] tests.each do |i| From 7549803c82e35e92cb4975c4cdd8c43cac390505 Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 12:34:06 -0800 Subject: [PATCH 8/9] Normalize more gmail leading digit domains --- CHANGELOG.md | 3 +++ lib/minfraud/components/email.rb | 1 + spec/components/email_spec.rb | 1 + 3 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e44783d5..07ab95df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,9 @@ become `example.com`. * Certain `.com` typos are now normalized to `.com` when `hash_address` is used. For example, `example.cam` will become `example.com`. +* Additional `gmail.com` domain names with leading digits are now + normalized when `hash_address` is used. For example, `100gmail.com` will + become `gmail.com`. ## v2.4.0 (2024-01-12) diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index a52c2c8e..5db00f42 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -329,6 +329,7 @@ def clean_domain(domain) domain.sub!(/(?:\.com){2,}$/, '.com') domain.sub!(/\.com[^.]+$/, '.com') domain.sub!(/(?:\.(?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$/, '.com') + domain.sub!(/^\d+(?:gmail?\.com)$/, 'gmail.com') if TYPO_DOMAINS.key?(domain) domain = TYPO_DOMAINS[domain] diff --git a/spec/components/email_spec.rb b/spec/components/email_spec.rb index 6681ca92..4c4c4b49 100644 --- a/spec/components/email_spec.rb +++ b/spec/components/email_spec.rb @@ -78,6 +78,7 @@ { input: 'foo@example.com.com', output: 'foo@example.com' }, { input: 'foo@example.comfoo', output: 'foo@example.com' }, { input: 'foo@example.cam', output: 'foo@example.com' }, + { input: 'foo@10000gmail.com', output: 'foo@gmail.com' }, ] tests.each do |i| From 86cc6bfa43d584acfa18eff95cf3b9c20ef9b174 Mon Sep 17 00:00:00 2001 From: William Storey Date: Fri, 26 Jan 2024 12:35:33 -0800 Subject: [PATCH 9/9] Update typo domains map --- CHANGELOG.md | 2 ++ lib/minfraud/components/email.rb | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07ab95df..10deed6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ * Additional `gmail.com` domain names with leading digits are now normalized when `hash_address` is used. For example, `100gmail.com` will become `gmail.com`. +* Additional `gmail.com` typos are now normalized when `hash_address` is + used. For example, `gmali.com` will become `gmail.com`. ## v2.4.0 (2024-01-12) diff --git a/lib/minfraud/components/email.rb b/lib/minfraud/components/email.rb index 5db00f42..c4fecc3f 100644 --- a/lib/minfraud/components/email.rb +++ b/lib/minfraud/components/email.rb @@ -116,12 +116,15 @@ def clean_email_address(address) TYPO_DOMAINS = { # gmail.com - '35gmai.com' => 'gmail.com', - '636gmail.com' => 'gmail.com', + 'gmai.com' => 'gmail.com', 'gamil.com' => 'gmail.com', - 'gmail.comu' => 'gmail.com', + 'gmali.com' => 'gmail.com', 'gmial.com' => 'gmail.com', 'gmil.com' => 'gmail.com', + 'gmaill.com' => 'gmail.com', + 'gmailm.com' => 'gmail.com', + 'gmailo.com' => 'gmail.com', + 'gmailyhoo.com' => 'gmail.com', 'yahoogmail.com' => 'gmail.com', # outlook.com 'putlook.com' => 'outlook.com',