Skip to content

Commit

Permalink
Esnrue UTF-8 surogates escaped on save - fix coddingtonbear#159
Browse files Browse the repository at this point in the history
  • Loading branch information
ad-m committed Jan 3, 2018
1 parent 7252295 commit f0c1cc1
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
9 changes: 6 additions & 3 deletions django_mailbox/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,6 @@ def _get_dehydrated_message(self, msg, record):
def _process_message(self, message):
msg = Message()
settings = utils.get_settings()

if settings['store_original_message']:
self._process_save_original_message(message, msg)
msg.mailbox = self
Expand Down Expand Up @@ -386,10 +385,14 @@ def _process_message(self, message):

def _process_save_original_message(self, message, msg):
settings = utils.get_settings()
if six.PY3:
content = message.as_string().encode('ascii', 'surrogateescape')
else:
content = message.as_string()
if settings['compress_original_message']:
with NamedTemporaryFile(suffix=".eml.gz") as fp_tmp:
with gzip.GzipFile(fileobj=fp_tmp, mode="w") as fp:
fp.write(message.as_string().encode('utf-8'))
fp.write(content)
msg.eml.save(
"%s.eml.gz" % (uuid.uuid4(), ),
File(fp_tmp),
Expand All @@ -399,7 +402,7 @@ def _process_save_original_message(self, message, msg):
else:
msg.eml.save(
'%s.eml' % uuid.uuid4(),
ContentFile(message.as_string()),
ContentFile(content),
save=False
)

Expand Down
36 changes: 35 additions & 1 deletion django_mailbox/tests/test_process_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,40 @@ def test_message_with_utf8_attachment_header(self):
u'odpowied\u017a Burmistrza.jpg'
)

def test_message_with_utf8_surrogates(self):
"""Ensure that we properly handle UTF-8 surrogates
The problem observed in Python 3.5. It safes from regress of #159.
"""

email_object = self._get_email_object(
'message_with_utf8_surrogates.eml',
)
mailbox = Mailbox.objects.create()
default_settings = utils.get_settings()
with mock.patch('django_mailbox.utils.get_settings') as get_settings:
altered = copy.deepcopy(default_settings)
altered['store_original_message'] = True

get_settings.return_value = altered

# This call throws the UnicodeEncodeError exception.
msg = mailbox.process_incoming_message(email_object)

self.assertEqual(
msg.subject,
u'Do czego te\u017c s\u0105 zdolni Polscy s\u0119dziowie ... '
)

self.assertEqual(
msg.attachments.count(),
0
)

with open(msg.eml.name, 'rb') as f:
self.assertEqual(f.read(),
self._get_email_as_text('message_with_utf8_surrogates.eml'))

def test_message_get_text_body(self):
message = self._get_email_object('multipart_text.eml')

Expand Down Expand Up @@ -468,4 +502,4 @@ def test_message_compressed(self):

with gzip.open(msg.eml.name, 'rb') as f:
self.assertEqual(f.read(),
self._get_email_as_text('generic_message.eml'))
self._get_email_as_text('generic_message.eml'))

0 comments on commit f0c1cc1

Please sign in to comment.