Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / the_oz.recipe < prev next >

Wrap

Text File | 2011-09-09 | 5KB | 90 lines

#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2009, Matthew Briggs' __docformat__ = 'restructuredtext en' ''' http://www.theaustralian.news.com.au/ ''' from calibre.web.feeds.news import BasicNewsRecipe class DailyTelegraph(BasicNewsRecipe): title = u'The Australian' __author__ = u'Matthew Briggs and Sujata Raman' description = u'National broadsheet newspaper from down under - colloquially known as The Oz' language = 'en_AU' oldest_article = 2 max_articles_per_feed = 30 remove_javascript = True no_stylesheets = True encoding = 'utf8' html2lrf_options = [ '--comment' , description , '--category' , 'news, Australia' , '--publisher' , title ] keep_only_tags = [dict(name='div', attrs={'id': 'story'})] #remove_tags = [dict(name=['object','link'])] remove_tags = [dict(name ='div', attrs = {'class': 'story-info'}), dict(name ='div', attrs = {'class': 'story-header-tools'}), dict(name ='div', attrs = {'class': 'story-sidebar'}), dict(name ='div', attrs = {'class': 'story-footer'}), dict(name ='div', attrs = {'id': 'comments'}), dict(name ='div', attrs = {'class': 'story-extras story-extras-2'}), dict(name ='div', attrs = {'class': 'group item-count-1 story-related'}) ] extra_css = ''' h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; } #article{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;} .module-subheader{font-family :Tahoma,Geneva,Arial,Helvetica,sans-serif; color:#666666; font-size: xx-small;} .intro{ font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif;font-size: x-small; } .article-source{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; color:#666666; font-size: xx-small;} .caption{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; font-size: xx-small;} ''' feeds = [ (u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'), (u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/aus_opinion_58.xml'), (u'The Nation', u'http://feeds.news.com.au/public/rss/2.0/aus_the_nation_62.xml'), (u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'), (u'US Election', u'http://feeds.news.com.au/public/rss/2.0/aus_uselection_687.xml'), (u'Climate', u'http://feeds.news.com.au/public/rss/2.0/aus_climate_809.xml'), (u'Media', u'http://feeds.news.com.au/public/rss/2.0/aus_media_57.xml'), (u'IT', u'http://feeds.news.com.au/public/rss/2.0/ausit_itnews_topstories_367.xml'), (u'Exec Tech', u'http://feeds.news.com.au/public/rss/2.0/ausit_exec_topstories_385.xml'), (u'Higher Education', u'http://feeds.news.com.au/public/rss/2.0/aus_higher_education_56.xml'), (u'Arts', u'http://feeds.news.com.au/public/rss/2.0/aus_arts_51.xml'), (u'Travel', u'http://feeds.news.com.au/public/rss/2.0/aus_travel_and_indulgence_63.xml'), (u'Property', u'http://feeds.news.com.au/public/rss/2.0/aus_property_59.xml'), (u'Sport', u'http://feeds.news.com.au/public/rss/2.0/aus_sport_61.xml'), (u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'), (u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'), (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'), (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')] def get_article_url(self, article): return article.id #br = self.get_browser() #br.open(article.link).read() #print br.geturl() #return br.geturl() def get_cover_url(self): href = 'http://www.theaustralian.news.com.au/' soup = self.index_to_soup(href) img = soup.find('img',alt ="AUS HP promo digital2") print img if img : cover_url = img['src'] return cover_url