-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathRSStoFriendika.py
162 lines (137 loc) · 4.79 KB
/
RSStoFriendika.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env python
# Application to post RSS updates to Friendika
import sys
import urllib
import urllib2
import urlparse
import cPickle as pickle
import hashlib
import StringIO
import ConfigParser
import copy
import time
# External libraries
import feedparser
import mako.template as mako
import lxml.etree as etree
conf_path = 'RSStoFriendika.conf'
def tweet(server, message, group_allow=None):
url = server + '/api/statuses/update'
urllib2.urlopen(url, urllib.urlencode({'status': message,'group_allow[]':group_allow}, doseq=True))
# Server can also convert HTML to BBCode but then templats must also be done in HTML
# Maybe later
htmltobbcode = etree.parse('htmltobbcode/HTMLtoBBCode.xslt')
htmlparser = etree.HTMLParser()
def makebbcode(html):
return unicode(etree.parse(StringIO.StringIO(html), htmlparser).xslt(htmltobbcode))
config = ConfigParser.SafeConfigParser()
config.read(conf_path)
config_changed = False
def getconfig(config, option, valid=''):
"""Interactively read config values"""
global config_changed # Ugly!
try:
return config.get('main',option)
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
try:
config.add_section('main')
except ConfigParser.DuplicateSectionError:
pass
config.set('main',option,raw_input('%s%s: '%(option.replace('_',' '), valid)))
config_changed = True
return config.get('main',option)
# Read config
old_config = copy.copy(config)
server = getconfig(config,'server').rstrip('/')
username = getconfig(config,'username')
password = getconfig(config,'password')
feeds_path = getconfig(config,'feeds_file')
# A bit of trickery to store a bool. ConfigParser.readboolean is no good because it doesn't accept "y" and "n".
store_guids = config.set('main','always_store_guid', str(getconfig(config,'always_store_guid', ' (y/N)').lower().startswith('y')))
# Handle defaults like this or a DEFAULT section is written to output config file
try:
guids_path = config.get('main','guids_file')
except:
guids_path = 'processed.dat'
try:
feeds_updated = config.getfloat('main','updated')
except:
feeds_updated = 0
try:
max_items = config.getint('main','max_per_feed')
except:
max_items = None
if config_changed:
reply = raw_input('Save config? (y/N): ')
if reply.lower().startswith('y'):
with open(conf_path, 'w') as configfile:
config.write(configfile)
else:
config = old_config
# Set up basic authentication
passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
passman.add_password(None, server, username, password)
authhandler = urllib2.HTTPBasicAuthHandler(passman)
opener = urllib2.build_opener(authhandler)
urllib2.install_opener(opener)
# Try to open "database" containing already processed GUIDs
# TODO: Something more efficient and robust
try:
with open(guids_path, 'r') as guids_file:
processed = pickle.load(guids_file)
except:
processed = set()
# Iterate over feed list
for line in open(feeds_path):
if line.startswith('#'): # Skip comments
continue
try:
feed_url, template_path, allowed_groups = line.strip().split()
allowed_groups = [int(x) for x in allowed_groups.split(',')]
except ValueError:
feed_url, template_path = line.strip().split()
allowed_groups = []
feed = feedparser.parse(feed_url)
for entry in feed['entries'][:max_items]:
try:
guid = entry['guid']
except:
guid = hashlib.sha256(repr(entry)).hexdigest()
try:
updated = time.mktime(entry['updated_parsed'])
except KeyError:
updated = None
updated = False # Too many feeds lie to actually make this useful, so turn off date checking :-(
if (updated and updated < feeds_updated) or guid in processed:
continue
# See above about lying feeds
#if updated == None or config.getboolean('main','always_store_guid'):
processed.add(guid) # Remember that we've processed this one
# Convert from HTML to BBCode which Friendika understands
# Title is also processed to get rid of HTML entities
for key in ['title', 'summary']:
try:
entry[key] = makebbcode('<html>%s</html>'%entry[key])
except KeyError:
pass
try:
for i, content in enumerate(entry.content):
content.value = makebbcode('<html>%s</html>'%content.value)
except AttributeError:
pass
# TODO: Also check link rel="icon" etc.
try:
favicon = '://'.join(urlparse.urlparse(entry['link'])[0:2])+'/favicon.ico'
except KeyError:
favicon = ''
try:
linked_title = '[url=%s][img=16x16]%s[/img]%s[/url]'%(entry['link'],favicon, entry['title'])
except KeyError:
linked_title = entry['title']
message = mako.Template(filename=template_path).render_unicode(entry=entry, favicon=favicon, linked_title=linked_title).encode('utf-8')
tweet(server, message, allowed_groups)
config.set('main','updated',str(time.time()))
with open(conf_path, 'w') as configfile:
config.write(configfile)
with open(guids_path, 'w') as guids_file:
pickle.dump(processed, guids_file)