Skip to content

Commit caad228

Browse files
committed
improved is_email + more tests
1 parent 35a3f91 commit caad228

File tree

2 files changed

+117
-62
lines changed

2 files changed

+117
-62
lines changed

string_utils/validation.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -234,26 +234,22 @@ def is_email(input_string: Any) -> bool:
234234
# we expect 2 tokens, one before "@" and one after, otherwise we have an exception and the email is not valid
235235
head, tail = input_string.split('@')
236236

237+
# head's size must be <= 64, tail <= 255, head must not start with a dot or contain multiple consecutive dots
238+
if len(head) > 64 or len(tail) > 255 or head.endswith('.') or ('..' in head):
239+
return False
240+
237241
# removes escaped spaces, so that later on the test regex will accept the string
238242
head = head.replace('\\ ', '')
239243
if head.startswith('"') and head.endswith('"'):
240244
head = head.replace(' ', '')[1:-1]
241245

242-
if head.endswith('.') or len(head) > 64 or len(tail) > 255:
243-
return False
244-
245-
# multiple consecutive dots are forbidden
246-
if '..' in head:
247-
return False
248-
249246
return EMAIL_RE.match(head + '@' + tail) is not None
250247

251248
except ValueError:
252249
# borderline case in which we have multiple "@" signs but the head part is correctly escaped
253250
if ESCAPED_AT_SIGN.search(input_string) is not None:
254251
# replace "@" with "a" in the head
255-
sanitized = ESCAPED_AT_SIGN.sub('a', input_string)
256-
return is_email(sanitized)
252+
return is_email(ESCAPED_AT_SIGN.sub('a', input_string))
257253

258254
return False
259255

tests/test_is_email.py

+112-53
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44

55

66
class IsEmailTestCase(TestCase):
7+
"""
8+
Definitions:
9+
10+
"head" -> the part before "@"
11+
"tail" -> the part after "@"
12+
"""
13+
714
def test_should_return_false_for_non_string_objects(self):
815
# noinspection PyTypeChecker
916
self.assertFalse(is_email(None))
@@ -24,10 +31,10 @@ def test_string_cannot_be_empty(self):
2431
self.assertFalse(is_email(''))
2532
self.assertFalse(is_email(' '))
2633

27-
def test_domain_part_is_required(self):
34+
def test_tail_is_required(self):
2835
self.assertFalse(is_email('name@'))
2936

30-
def test_name_part_is_required(self):
37+
def test_head_is_required(self):
3138
self.assertFalse(is_email('@foo.com'))
3239

3340
def test_at_sign_is_required(self):
@@ -44,19 +51,19 @@ def test_domain_extension_should_be_letters_only_from_2_to_4_chars(self):
4451
self.assertFalse(is_email('me@foo.___'))
4552
self.assertFalse(is_email('[email protected]'))
4653

47-
def test_name_part_cannot_contain_suqare_brackets(self):
54+
def test_head_cannot_contain_suqare_brackets(self):
4855
self.assertFalse(is_email('[[email protected]'))
4956
self.assertFalse(is_email('my][email protected]'))
5057

51-
def test_domain_part_cannot_contain_bad_signs(self):
58+
def test_tail_cannot_contain_bad_signs(self):
5259
self.assertFalse(is_email('me@#foo#.com'))
5360
self.assertFalse(is_email('me@foo!.com'))
5461
self.assertFalse(is_email('someone@[foo].com'))
5562

56-
def test_domain_part_cannot_be_uppercase(self):
63+
def test_tail_cannot_be_uppercase(self):
5764
self.assertFalse(is_email('[email protected]'))
5865

59-
def test_domain_part_cannot_contain_dots_sequence(self):
66+
def test_tail_cannot_contain_dots_sequence(self):
6067
self.assertFalse(is_email('[email protected]'))
6168
self.assertFalse(is_email('[email protected]'))
6269

@@ -77,79 +84,124 @@ def test_max_email_length_is_respected(self):
7784
invalid_email = ('a' * 320) + '@gmail.com'
7885
self.assertFalse(is_email(invalid_email))
7986

80-
def test_local_part_length_is_respected(self):
81-
# max local part is 64 (before "@")
87+
def test_head_length_is_respected(self):
88+
# max head size is 64
8289
invalid_email = ('a' * 65) + '@gmail.com'
8390
self.assertFalse(is_email(invalid_email))
8491

85-
def test_octects_part_length_is_respected(self):
86-
# max octets part is 255 (after "@")
92+
def test_tail_length_is_respected(self):
93+
# max tail size is 255
8794
invalid_email = 'a@{}.com'.format(255 * 'x')
8895
self.assertFalse(is_email(invalid_email))
8996

90-
def test_plus_is_valid_char_in_local_part(self):
91-
self.assertTrue(is_email("[email protected]"))
97+
def test_plus_is_valid_char_in_head(self):
98+
self.assertTrue(is_email('[email protected]'))
99+
100+
def test_plus_is_invalid_char_in_tail(self):
101+
self.assertFalse(is_email('mymail@foo+bar.com'))
102+
103+
def test_minus_is_valid_char_in_head(self):
104+
self.assertTrue(is_email('[email protected]'))
105+
106+
def test_slash_is_valid_char_in_head(self):
107+
self.assertTrue(is_email('my/[email protected]'))
108+
109+
def test_slash_is_invalid_char_in_tail(self):
110+
self.assertFalse(is_email('mymail@foo/bar.com'))
111+
112+
def test_back_slash_is_valid_char_in_head(self):
113+
self.assertTrue(is_email('my\\[email protected]'))
114+
115+
def test_equal_is_valid_char_in_head(self):
116+
self.assertTrue(is_email('[email protected]'))
117+
118+
def test_equal_is_invalid_char_in_tail(self):
119+
self.assertFalse(is_email('mymail@gm=ail.com'))
92120

93-
def test_minus_is_valid_char_in_local_part(self):
94-
self.assertTrue(is_email("my-[email protected]"))
121+
def test_question_mark_is_valid_char_in_head(self):
122+
self.assertTrue(is_email('my?[email protected]'))
95123

96-
def test_slash_is_valid_char_in_local_part(self):
97-
self.assertTrue(is_email("my/mail@gmail.com"))
124+
def test_question_mark_is_invalid_char_in_tail(self):
125+
self.assertFalse(is_email('mymail@gm?ail.com'))
98126

99-
def test_back_slash_is_valid_char_in_local_part(self):
100-
self.assertTrue(is_email("my\\[email protected]"))
127+
def test_sharp_is_valid_char_in_head(self):
128+
self.assertTrue(is_email('my#[email protected]'))
101129

102-
def test_equal_is_valid_char_in_local_part(self):
103-
self.assertTrue(is_email("my=mail@gmail.com"))
130+
def test_sharp_is_invalid_char_in_tail(self):
131+
self.assertFalse(is_email('mymail@gma#il.com'))
104132

105-
def test_question_mark_is_valid_char_in_local_part(self):
106-
self.assertTrue(is_email("my?[email protected]"))
133+
def test_dollar_is_valid_char_in_head(self):
134+
self.assertTrue(is_email('my$[email protected]'))
107135

108-
def test_sharp_is_valid_char_in_local_part(self):
109-
self.assertTrue(is_email("my#mail@gmail.com"))
136+
def test_dollar_is_invalid_char_in_tail(self):
137+
self.assertFalse(is_email('mymail@gm$ail.com'))
110138

111-
def test_dollar_is_valid_char_in_local_part(self):
112-
self.assertTrue(is_email("my$[email protected]"))
139+
def test_and_is_valid_char_in_head(self):
140+
self.assertTrue(is_email('my&[email protected]'))
113141

114-
def test_and_is_valid_char_in_local_part(self):
115-
self.assertTrue(is_email("my&mail@gmail.com"))
142+
def test_and_is_invalid_char_in_tail(self):
143+
self.assertFalse(is_email('mymail@gm&ail.com'))
116144

117-
def test_asterisk_is_valid_char_in_local_part(self):
118-
self.assertTrue(is_email("my*[email protected]"))
145+
def test_asterisk_is_valid_char_in_head(self):
146+
self.assertTrue(is_email('my*[email protected]'))
119147

120-
def test_apostrophe_is_valid_char_in_local_part(self):
121-
self.assertTrue(is_email("my'mail@gmail.com"))
148+
def test_asterisk_is_invalid_char_in_tail(self):
149+
self.assertFalse(is_email('mymail@gmai*l.com'))
122150

123-
def test_acute_accent_is_valid_char_in_local_part(self):
124-
self.assertTrue(is_email("my`[email protected]"))
151+
def test_apostrophe_is_valid_char_in_head(self):
152+
self.assertTrue(is_email('my\'[email protected]'))
125153

126-
def test_percentage_is_valid_char_in_local_part(self):
127-
self.assertTrue(is_email("my%mail@gmail.com"))
154+
def test_apostrophe_is_invalid_char_in_tail(self):
155+
self.assertFalse(is_email('mymail@gma\'il.com'))
128156

129-
def test_exclamation_mark_is_valid_char_in_local_part(self):
130-
self.assertTrue(is_email("my![email protected]"))
157+
def test_acute_accent_is_valid_char_in_head(self):
158+
self.assertTrue(is_email('my`[email protected]'))
131159

132-
def test_caret_is_valid_char_in_local_part(self):
133-
self.assertTrue(is_email("my^mail@gmail.com"))
160+
def test_acute_accent_is_invalid_char_in_tail(self):
161+
self.assertFalse(is_email('mymail@gma`il.com'))
134162

135-
def test_pipe_is_valid_char_in_local_part(self):
136-
self.assertTrue(is_email("my|[email protected]"))
163+
def test_percentage_is_valid_char_in_head(self):
164+
self.assertTrue(is_email('my%[email protected]'))
137165

138-
def test_tilde_is_valid_char_in_local_part(self):
139-
self.assertTrue(is_email("my~mail@gmail.com"))
166+
def test_percentage_is_invalid_char_in_tail(self):
167+
self.assertFalse(is_email('mymail@gma%il.com'))
140168

141-
def test_curly_braces_are_valid_char_in_local_part(self):
142-
self.assertTrue(is_email("my{[email protected]"))
143-
self.assertTrue(is_email("my}[email protected]"))
144-
self.assertTrue(is_email("{mymail}@gmail.com"))
169+
def test_exclamation_mark_is_valid_char_in_head(self):
170+
self.assertTrue(is_email('[email protected]'))
145171

146-
def test_local_part_cannot_start_with_period(self):
172+
def test_exclamation_mark_is_invalid_char_in_tail(self):
173+
self.assertFalse(is_email('mymail@gm!ail.com'))
174+
175+
def test_caret_is_valid_char_in_head(self):
176+
self.assertTrue(is_email('my^[email protected]'))
177+
178+
def test_caret_is_invalid_char_in_tail(self):
179+
self.assertFalse(is_email('mymail@gma^il.com'))
180+
181+
def test_pipe_is_valid_char_in_head(self):
182+
self.assertTrue(is_email('my|[email protected]'))
183+
184+
def test_pipe_is_invalid_char_in_tail(self):
185+
self.assertFalse(is_email('mymail@gm|ail.com'))
186+
187+
def test_tilde_is_valid_char_in_head(self):
188+
self.assertTrue(is_email('[email protected]'))
189+
190+
def test_tilde_is_invalid_char_in_tail(self):
191+
self.assertFalse(is_email('mymail@gma~il.com'))
192+
193+
def test_curly_braces_are_valid_char_in_head(self):
194+
self.assertTrue(is_email('my{[email protected]'))
195+
self.assertTrue(is_email('my}[email protected]'))
196+
self.assertTrue(is_email('{mymail}@gmail.com'))
197+
198+
def test_head_cannot_start_with_period(self):
147199
self.assertFalse(is_email('[email protected]'))
148200

149-
def test_local_part_cannot_end_with_period(self):
201+
def test_head_cannot_end_with_period(self):
150202
self.assertFalse(is_email('[email protected]'))
151203

152-
def test_local_part_cannot_have_multiple_consecutive_periods(self):
204+
def test_head_cannot_have_multiple_consecutive_periods(self):
153205
self.assertFalse(is_email('[email protected]'))
154206
self.assertFalse(is_email('[email protected]'))
155207

@@ -158,14 +210,21 @@ def test_empty_spaces_are_allowed_only_if_escaped(self):
158210
self.assertTrue(is_email('my\\ [email protected]'))
159211
self.assertTrue(is_email('"my mail"@gmail.com'))
160212

161-
def test_local_part_can_be_quoted(self):
213+
def test_head_can_be_quoted(self):
162214
self.assertTrue(is_email('"foo"@example.com'))
163215

216+
def test_head_quotes_must_be_correct(self):
217+
self.assertFalse(is_email('"[email protected]'))
218+
self.assertFalse(is_email('no"[email protected]'))
219+
self.assertFalse(is_email('nope"@gmail.com'))
220+
self.assertFalse(is_email('""@gmail.com'))
221+
self.assertFalse(is_email('"no"pe"@gmail.com'))
222+
164223
def test_with_quoted_string_multiple_at_are_accepted(self):
165224
self.assertTrue(is_email('"Abc@def"@example.com'))
166225

167226
def test_with_escape_multiple_at_are_accepted(self):
168227
self.assertTrue(is_email('Abc\\@[email protected]'))
169228

170-
def test_local_part_can_have_self_escape(self):
229+
def test_head_can_have_self_escape(self):
171230
self.assertTrue(is_email('Joe.\\\\[email protected]'))

0 commit comments

Comments
 (0)