home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / the_oz.recipe < prev    next >
Text File  |  2011-09-09  |  5KB  |  90 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2009, Matthew Briggs'
  4. __docformat__ = 'restructuredtext en'
  5.  
  6. '''
  7. http://www.theaustralian.news.com.au/
  8. '''
  9.  
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11.  
  12. class DailyTelegraph(BasicNewsRecipe):
  13.     title          = u'The Australian'
  14.     __author__     = u'Matthew Briggs and Sujata Raman'
  15.     description    = u'National broadsheet newspaper from down under - colloquially known as The Oz'
  16.     language = 'en_AU'
  17.  
  18.     oldest_article = 2
  19.     max_articles_per_feed = 30
  20.     remove_javascript      = True
  21.     no_stylesheets         = True
  22.     encoding               = 'utf8'
  23.  
  24.     html2lrf_options = [
  25.                           '--comment'       , description
  26.                         , '--category'      , 'news, Australia'
  27.                         , '--publisher'     , title
  28.                         ]
  29.  
  30.     keep_only_tags = [dict(name='div', attrs={'id': 'story'})]
  31.  
  32.     #remove_tags = [dict(name=['object','link'])]
  33.     remove_tags = [dict(name ='div', attrs = {'class': 'story-info'}),
  34.                           dict(name ='div', attrs = {'class': 'story-header-tools'}),
  35.                           dict(name ='div', attrs = {'class': 'story-sidebar'}),
  36.                           dict(name ='div', attrs = {'class': 'story-footer'}),
  37.                           dict(name ='div', attrs = {'id': 'comments'}),
  38.                           dict(name ='div', attrs = {'class': 'story-extras story-extras-2'}),
  39.                           dict(name ='div', attrs = {'class': 'group item-count-1 story-related'})
  40.                           ]
  41.  
  42.     extra_css   = '''
  43.                     h1{font-family :Georgia,"Times New Roman",Times,serif; font-size:large; }
  44.                     #article{font-family :Georgia,"Times New Roman",Times,serif; font-size: x-small;}
  45.                     .module-subheader{font-family :Tahoma,Geneva,Arial,Helvetica,sans-serif; color:#666666; font-size: xx-small;}
  46.                     .intro{ font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif;font-size: x-small; }
  47.                     .article-source{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; color:#666666; font-size: xx-small;}
  48.                     .caption{font-family:Trebuchet MS,Trebuchet,Helvetica,sans-serif; font-size: xx-small;}
  49.                 '''
  50.  
  51.     feeds = [       (u'News', u'http://feeds.news.com.au/public/rss/2.0/aus_news_807.xml'),
  52.                     (u'Opinion', u'http://feeds.news.com.au/public/rss/2.0/aus_opinion_58.xml'),
  53.                     (u'The Nation', u'http://feeds.news.com.au/public/rss/2.0/aus_the_nation_62.xml'),
  54.                     (u'World News', u'http://feeds.news.com.au/public/rss/2.0/aus_world_808.xml'),
  55.                     (u'US Election', u'http://feeds.news.com.au/public/rss/2.0/aus_uselection_687.xml'),
  56.                     (u'Climate', u'http://feeds.news.com.au/public/rss/2.0/aus_climate_809.xml'),
  57.                     (u'Media', u'http://feeds.news.com.au/public/rss/2.0/aus_media_57.xml'),
  58.                     (u'IT', u'http://feeds.news.com.au/public/rss/2.0/ausit_itnews_topstories_367.xml'),
  59.                     (u'Exec Tech', u'http://feeds.news.com.au/public/rss/2.0/ausit_exec_topstories_385.xml'),
  60.                     (u'Higher Education', u'http://feeds.news.com.au/public/rss/2.0/aus_higher_education_56.xml'),
  61.                     (u'Arts', u'http://feeds.news.com.au/public/rss/2.0/aus_arts_51.xml'),
  62.                     (u'Travel', u'http://feeds.news.com.au/public/rss/2.0/aus_travel_and_indulgence_63.xml'),
  63.                     (u'Property', u'http://feeds.news.com.au/public/rss/2.0/aus_property_59.xml'),
  64.                     (u'Sport', u'http://feeds.news.com.au/public/rss/2.0/aus_sport_61.xml'),
  65.                     (u'Business', u'http://feeds.news.com.au/public/rss/2.0/aus_business_811.xml'),
  66.                     (u'Aviation', u'http://feeds.news.com.au/public/rss/2.0/aus_business_aviation_706.xml'),
  67.                     (u'Commercial Property', u'http://feeds.news.com.au/public/rss/2.0/aus_business_commercial_property_708.xml'),
  68.                     (u'Mining', u'http://feeds.news.com.au/public/rss/2.0/aus_business_mining_704.xml')]
  69.  
  70.     def get_article_url(self, article):
  71.         return article.id
  72.  
  73.         #br = self.get_browser()
  74.         #br.open(article.link).read()
  75.         #print br.geturl()
  76.  
  77.         #return br.geturl()
  78.  
  79.     def get_cover_url(self):
  80.  
  81.         href =  'http://www.theaustralian.news.com.au/'
  82.  
  83.         soup = self.index_to_soup(href)
  84.         img = soup.find('img',alt ="AUS HP promo digital2")
  85.         print img
  86.         if img :
  87.            cover_url = img['src']
  88.  
  89.         return cover_url
  90.