home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / straitstimes.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  62 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. www.straitstimes.com
  6. '''
  7.  
  8. import re
  9. from calibre.web.feeds.recipes import BasicNewsRecipe
  10.  
  11. class StraitsTimes(BasicNewsRecipe):
  12.     title                  = 'The Straits Times'
  13.     __author__             = 'Darko Miletic'
  14.     description            = 'Singapore newspaper'
  15.     oldest_article         = 2
  16.     max_articles_per_feed  = 100
  17.     no_stylesheets         = True
  18.     use_embedded_content   = False
  19.     encoding               = 'cp1252'
  20.     publisher              = 'Singapore Press Holdings Ltd.'
  21.     category               = 'news, politics, singapore, asia'
  22.     language               = 'en_SG'
  23.     extra_css              = ' .top_headline{font-size: x-large; font-weight: bold} '
  24.  
  25.     conversion_options = {
  26.                              'comments'  : description
  27.                             ,'tags'      : category
  28.                             ,'language'  : language
  29.                             ,'publisher' : publisher
  30.                          }
  31.  
  32.     preprocess_regexps = [
  33.                            (re.compile(
  34.                             r'<meta name="description" content="[^"]+"\s*/?>',
  35.                             re.IGNORECASE|re.DOTALL),
  36.                             lambda m:''),
  37.                            (re.compile(r'<!--.+?-->', re.IGNORECASE|re.DOTALL),
  38.                                lambda m: ''),
  39.                          ]
  40.     remove_tags = [
  41.                      dict(name=['object','link','map'])
  42.                     ,dict(name='div',attrs={'align':'left'})
  43.                   ]
  44.  
  45.     keep_only_tags = [dict(name='div', attrs={'class':'stleft'})]
  46.     remove_tags_after=dict(name='div',attrs={'class':'hr_thin'})
  47.  
  48.     feeds = [
  49.                (u'Singapore'       , u'http://www.straitstimes.com/STI/STIFILES/rss/break_singapore.xml' )
  50.               ,(u'SE Asia'         , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sea.xml'       )
  51.               ,(u'Money'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_money.xml'     )
  52.               ,(u'Sport'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_sport.xml'     )
  53.               ,(u'World'           , u'http://www.straitstimes.com/STI/STIFILES/rss/break_world.xml'     )
  54.               ,(u'Tech & Science'  , u'http://www.straitstimes.com/STI/STIFILES/rss/break_tech.xml'      )
  55.               ,(u'Lifestyle'       , u'http://www.straitstimes.com/STI/STIFILES/rss/break_lifestyle.xml' )
  56.             ]
  57.  
  58.     def preprocess_html(self, soup):
  59.         for item in soup.findAll(style=True):
  60.             del item['style']
  61.         return soup
  62.