home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
irish_times.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
3KB
|
66 lines
__license__ = 'GPL v3'
__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
'''
irishtimes.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class IrishTimes(BasicNewsRecipe):
title = u'The Irish Times'
encoding = 'ISO-8859-15'
__author__ = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
language = 'en_IE'
timefmt = ' (%A, %B %d, %Y)'
oldest_article = 1.0
max_articles_per_feed = 100
no_stylesheets = True
simultaneous_downloads= 5
r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
remove_tags = [dict(name='div', attrs={'class':'footer'})]
extra_css = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt }'
feeds = [
('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'),
('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'),
('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'),
('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'),
('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'),
]
def print_version(self, url):
if url.count('rss.feedsportal.com'):
#u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
u = url.find('irishtimes')
u = 'http://www.irishtimes.com' + url[u + 12:]
u = u.replace('0C', '/')
u = u.replace('A', '')
u = u.replace('0Bhtml/story01.htm', '_pf.html')
else:
u = url.replace('.html','_pf.html')
return u
def get_article_url(self, article):
return article.link