From f0c1cc19f5c714fd07f5aa7478d2f3d32bce5ff8 Mon Sep 17 00:00:00 2001 From: Adam Dobrawy Date: Wed, 3 Jan 2018 03:36:09 +0100 Subject: [PATCH] Esnrue UTF-8 surogates escaped on save - fix #159 --- django_mailbox/models.py | 9 ++++-- django_mailbox/tests/test_process_email.py | 36 +++++++++++++++++++++- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/django_mailbox/models.py b/django_mailbox/models.py index 6ca9884e..6e183dfb 100644 --- a/django_mailbox/models.py +++ b/django_mailbox/models.py @@ -346,7 +346,6 @@ def _get_dehydrated_message(self, msg, record): def _process_message(self, message): msg = Message() settings = utils.get_settings() - if settings['store_original_message']: self._process_save_original_message(message, msg) msg.mailbox = self @@ -386,10 +385,14 @@ def _process_message(self, message): def _process_save_original_message(self, message, msg): settings = utils.get_settings() + if six.PY3: + content = message.as_string().encode('ascii', 'surrogateescape') + else: + content = message.as_string() if settings['compress_original_message']: with NamedTemporaryFile(suffix=".eml.gz") as fp_tmp: with gzip.GzipFile(fileobj=fp_tmp, mode="w") as fp: - fp.write(message.as_string().encode('utf-8')) + fp.write(content) msg.eml.save( "%s.eml.gz" % (uuid.uuid4(), ), File(fp_tmp), @@ -399,7 +402,7 @@ def _process_save_original_message(self, message, msg): else: msg.eml.save( '%s.eml' % uuid.uuid4(), - ContentFile(message.as_string()), + ContentFile(content), save=False ) diff --git a/django_mailbox/tests/test_process_email.py b/django_mailbox/tests/test_process_email.py index a128e2c6..b71bc789 100644 --- a/django_mailbox/tests/test_process_email.py +++ b/django_mailbox/tests/test_process_email.py @@ -147,6 +147,40 @@ def test_message_with_utf8_attachment_header(self): u'odpowied\u017a Burmistrza.jpg' ) + def test_message_with_utf8_surrogates(self): + """Ensure that we properly handle UTF-8 surrogates + + The problem observed in Python 3.5. It safes from regress of #159. + """ + + email_object = self._get_email_object( + 'message_with_utf8_surrogates.eml', + ) + mailbox = Mailbox.objects.create() + default_settings = utils.get_settings() + with mock.patch('django_mailbox.utils.get_settings') as get_settings: + altered = copy.deepcopy(default_settings) + altered['store_original_message'] = True + + get_settings.return_value = altered + + # This call throws the UnicodeEncodeError exception. + msg = mailbox.process_incoming_message(email_object) + + self.assertEqual( + msg.subject, + u'Do czego te\u017c s\u0105 zdolni Polscy s\u0119dziowie ... ' + ) + + self.assertEqual( + msg.attachments.count(), + 0 + ) + + with open(msg.eml.name, 'rb') as f: + self.assertEqual(f.read(), + self._get_email_as_text('message_with_utf8_surrogates.eml')) + def test_message_get_text_body(self): message = self._get_email_object('multipart_text.eml') @@ -468,4 +502,4 @@ def test_message_compressed(self): with gzip.open(msg.eml.name, 'rb') as f: self.assertEqual(f.read(), - self._get_email_as_text('generic_message.eml')) \ No newline at end of file + self._get_email_as_text('generic_message.eml'))