From c9e1e6ef6fcabf061323d8a70177793cbaecb635 Mon Sep 17 00:00:00 2001 From: Pietro Martino Lugato Date: Wed, 25 Sep 2024 14:54:17 -0400 Subject: [PATCH] fix mailbox service to handle bad chars --- A2rchi/utils/mailbox.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/A2rchi/utils/mailbox.py b/A2rchi/utils/mailbox.py index 6462a48..a64854b 100644 --- a/A2rchi/utils/mailbox.py +++ b/A2rchi/utils/mailbox.py @@ -5,6 +5,7 @@ import email import imaplib import os +import re ### DEFINITIONS # this constant defines an offset into the message description @@ -184,11 +185,33 @@ def _get_fields(self, msg): body, body_html = self._get_email_body(msg) description = body if body else body_html + description = self._clear_text(description) print("BODY:") print(description) return sender, cc, subject, description + def _clear_text(self,string): + emoj = re.compile("[" + u"\U0001F600-\U0001F64F" # emoticons + u"\U0001F300-\U0001F5FF" # symbols & pictographs + u"\U0001F680-\U0001F6FF" # transport & map symbols + u"\U0001F1E0-\U0001F1FF" # flags (iOS) + u"\U00002500-\U00002BEF" # chinese char + u"\U00002702-\U000027B0" + u"\U000024C2-\U0001F251" + u"\U0001f926-\U0001f937" + u"\U00010000-\U0010ffff" + u"\u2640-\u2642" + u"\u2600-\u2B55" + u"\u200d" + u"\u23cf" + u"\u23e9" + u"\u231a" + u"\ufe0f" # dingbats + u"\u3030" + "]+", re.UNICODE) + return re.sub(emoj, '', string) def _connect(self): """