-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy patharabian_business.recipe
86 lines (78 loc) · 5.88 KB
/
arabian_business.recipe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
www.arabianbusiness.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Arabian_Business(BasicNewsRecipe):
title = 'Arabian Business'
__author__ = 'Darko Miletic'
description = 'Comprehensive Guide to Middle East Business & Gulf Industry News including,Banking & Finance,Construction,Energy,Media & Marketing,Real Estate,Transportation,Travel,Technology,Politics,Healthcare,Lifestyle,Jobs & UAE guide.Top Gulf & Dubai Business News.'
publisher = 'Arabian Business Publishing Ltd.'
category = 'ArabianBusiness.com,Arab Business News,Middle East Business News,Middle East Business,Arab Media News,Industry Events,Middle East Industry News,Arab Business Industry,Dubai Business News,Financial News,UAE Business News,Middle East Press Releases,Gulf News,Arab News,GCC Business News,Banking Finance,Media Marketing,Construction,Oil Gas,Retail,Transportation,Travel Hospitality,Photos,Videos,Life Style,Fashion,United Arab Emirates,UAE,Dubai,Sharjah,Abu Dhabi,Qatar,KSA,Saudi Arabia,Bahrain,Kuwait,Oman,Europe,South Asia,America,Asia,news'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'newsportal'
masthead_url = 'http://www.arabianbusiness.com/skins/ab.main/gfx/arabianbusiness_logo_sm.gif'
extra_css = """
body{font-family: Georgia,serif }
img{margin-bottom: 0.4em; margin-top: 0.4em; display:block}
.byline,.dateline{font-size: small; display: inline; font-weight: bold}
ul{list-style: none outside none;}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags_before=dict(attrs={'id':'article-title'})
remove_tags = [
dict(name=['meta','link','base','iframe','embed','object'])
,dict(attrs={'class':'printfooter'})
]
remove_attributes=['lang']
feeds = [
(u'Africa' , u'http://www.arabianbusiness.com/world/Africa/?service=rss' )
,(u'Americas' , u'http://www.arabianbusiness.com/world/americas/?service=rss' )
,(u'Asia Pacific' , u'http://www.arabianbusiness.com/world/asia-pacific/?service=rss' )
,(u'Europe' , u'http://www.arabianbusiness.com/world/europe/?service=rss' )
,(u'Middle East' , u'http://www.arabianbusiness.com/world/middle-east/?service=rss' )
,(u'South Asia' , u'http://www.arabianbusiness.com/world/south-asia/?service=rss' )
,(u'Banking & Finance', u'http://www.arabianbusiness.com/industries/banking-finance/?service=rss' )
,(u'Construction' , u'http://www.arabianbusiness.com/industries/construction/?service=rss' )
,(u'Education' , u'http://www.arabianbusiness.com/industries/education/?service=rss' )
,(u'Energy' , u'http://www.arabianbusiness.com/industries/energy/?service=rss' )
,(u'Healthcare' , u'http://www.arabianbusiness.com/industries/healthcare/?service=rss' )
,(u'Media' , u'http://www.arabianbusiness.com/industries/media/?service=rss' )
,(u'Real Estate' , u'http://www.arabianbusiness.com/industries/real-estate/?service=rss' )
,(u'Retail' , u'http://www.arabianbusiness.com/industries/retail/?service=rss' )
,(u'Technology' , u'http://www.arabianbusiness.com/industries/technology/?service=rss' )
,(u'Transport' , u'http://www.arabianbusiness.com/industries/transport/?service=rss' )
,(u'Travel' , u'http://www.arabianbusiness.com/industries/travel-hospitality/?service=rss')
,(u'Equities' , u'http://www.arabianbusiness.com/markets/equities/?service=rss' )
,(u'Commodities' , u'http://www.arabianbusiness.com/markets/commodities/?service=rss' )
,(u'Currencies' , u'http://www.arabianbusiness.com/markets/currencies/?service=rss' )
,(u'Market Data' , u'http://www.arabianbusiness.com/markets/market-data/?service=rss' )
,(u'Comment' , u'http://www.arabianbusiness.com/opinion/comment/?service=rss' )
,(u'Think Tank' , u'http://www.arabianbusiness.com/opinion/think-tank/?service=rss' )
,(u'Arts' , u'http://www.arabianbusiness.com/lifestyle/arts/?service=rss' )
,(u'Cars' , u'http://www.arabianbusiness.com/lifestyle/cars/?service=rss' )
,(u'Food' , u'http://www.arabianbusiness.com/lifestyle/food/?service=rss' )
,(u'Sport' , u'http://www.arabianbusiness.com/lifestyle/sport/?service=rss' )
]
def print_version(self, url):
return url + '?service=printer&page='
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup