home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
times_online.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
5KB
|
107 lines
__license__ = 'GPL v3'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
www.thetimes.co.uk
'''
import urllib
from calibre.web.feeds.news import BasicNewsRecipe
class TimesOnline(BasicNewsRecipe):
title = 'The Times UK'
__author__ = 'Darko Miletic'
description = 'news from United Kingdom and World'
language = 'en_GB'
publisher = 'Times Newspapers Ltd'
category = 'news, politics, UK'
oldest_article = 3
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
delay = 1
needs_subscription = True
publication_type = 'newspaper'
masthead_url = 'http://www.thetimes.co.uk/tto/public/img/the_times_460.gif'
INDEX = 'http://www.thetimes.co.uk'
PREFIX = u'http://www.thetimes.co.uk/tto/'
extra_css = """
.f-ha{font-size: xx-large; font-weight: bold}
.f-author{font-family: Arial,Helvetica,sans-serif}
.caption{font-size: small}
body{font-family: Georgia,"Times New Roman",Times,serif}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www.timesplus.co.uk/tto/news/?login=false&url=http://www.thetimes.co.uk/tto/news/?lightbox=false')
if self.username is not None and self.password is not None:
data = urllib.urlencode({ 'userName':self.username
,'password':self.password
,'keepMeLoggedIn':'false'
})
br.open('https://www.timesplus.co.uk/iam/app/authenticate',data)
return br
remove_tags = [
dict(name=['object','link','iframe','base','meta'])
,dict(attrs={'class':'tto-counter' })
]
remove_attributes=['lang']
keep_only_tags = [
dict(attrs={'class':'heading' })
,dict(attrs={'class':'f-author'})
,dict(attrs={'id':'bodycopy'})
]
feeds = [
(u'UK News' , PREFIX + u'news/uk/?view=list' )
,(u'World' , PREFIX + u'news/world/?view=list' )
,(u'Politics' , PREFIX + u'news/politics/?view=list')
,(u'Health' , PREFIX + u'health/news/?view=list' )
,(u'Education' , PREFIX + u'education/?view=list' )
,(u'Technology' , PREFIX + u'technology/?view=list' )
,(u'Science' , PREFIX + u'science/?view=list' )
,(u'Environment' , PREFIX + u'environment/?view=list' )
,(u'Faith' , PREFIX + u'faith/?view=list' )
,(u'Opinion' , PREFIX + u'opinion/?view=list' )
,(u'Sport' , PREFIX + u'sport/?view=list' )
,(u'Business' , PREFIX + u'business/?view=list' )
,(u'Money' , PREFIX + u'money/?view=list' )
,(u'Life' , PREFIX + u'life/?view=list' )
,(u'Arts' , PREFIX + u'arts/?view=list' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)
def parse_index(self):
totalfeeds = []
lfeeds = self.get_feeds()
for feedobj in lfeeds:
feedtitle, feedurl = feedobj
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
articles = []
soup = self.index_to_soup(feedurl)
for item in soup.findAll('td', attrs={'class':'title'}):
atag = item.find('a')
url = self.INDEX + atag['href']
title = self.tag_to_string(atag)
articles.append({
'title' :title
,'date' :''
,'url' :url
,'description':''
})
totalfeeds.append((feedtitle, articles))
return totalfeeds