-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmanage_html.py
39 lines (31 loc) · 1.29 KB
/
manage_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import re
from django.utils.safestring import mark_safe
def strip(s, all_tags=None):
try:
from BeautifulSoup import BeautifulSoup, Comment
soup = BeautifulSoup(s)
except ImportError:
soup = None
valid_tags = ('strong b a i'.split() if not all_tags else '')
valid_attrs = ('href src'.split() if not all_tags else '')
if soup:
for Comment in soup.findAll(
text=lambda text: isinstance(text, Comment)):
comment.extract()
for tag in soup.findAll(True):
if tag.name not in valid_tags:
tag.hidden = True
tag.attrs = [(attr, val) for attr, val in tag.attrs
if attr in valid_attrs]
ret = soup.renderContents().decode('utf8').replace('javascript:', '')
else:
ret = "Could not load BeautifulSoup"
return ret
def convert_links(s):
#NOTE: TEXT MUST ALREDY BE ESCAPED...
##Find links that aren't already active (hyperlinked) and turn into hyperlink
URL_regex = re.compile(r'(|^)http([\w\d\.\:\/]+?)(\s|$|\:|,)', re.IGNORECASE)
s = URL_regex.sub(r'\1<a href="http\2">http\2</a>\3', s)
URL_regex = re.compile(r'(\s|^)@([\w\d_]+)', re.IGNORECASE)
s = URL_regex.sub(r'\1<a href="http://twitter.com/\2/">@\2</a>', s)
return mark_safe(s)