home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / theeconomictimes_india.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  2.0 KB  |  55 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. economictimes.indiatimes.com
  5. '''
  6.  
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class TheEconomicTimes(BasicNewsRecipe):
  11.     title                  = 'The Economic Times India'
  12.     __author__             = 'Darko Miletic'
  13.     description            = 'Financial news from India'
  14.     publisher              = 'economictimes.indiatimes.com'
  15.     category               = 'news, finances, politics, India'
  16.     oldest_article         = 2
  17.     max_articles_per_feed  = 100
  18.     no_stylesheets         = True
  19.     use_embedded_content   = False
  20.     simultaneous_downloads = 1
  21.     encoding               = 'utf-8'
  22.     language               = 'en_IN'
  23.     publication_type       = 'newspaper'
  24.     masthead_url           = 'http://economictimes.indiatimes.com/photo/2676871.cms'
  25.     extra_css              = """
  26.                                  body{font-family: Arial,Helvetica,sans-serif}
  27.                              """
  28.  
  29.     conversion_options = {
  30.                           'comment'          : description
  31.                         , 'tags'             : category
  32.                         , 'publisher'        : publisher
  33.                         , 'language'         : language
  34.                         }
  35.  
  36.  
  37.     remove_tags_before = dict(name='h1')
  38.     feeds          = [(u'All articles', u'http://economictimes.indiatimes.com/rssfeedsdefault.cms')]
  39.  
  40.     def print_version(self, url):
  41.         rest, sep, art = url.rpartition('/articleshow/')
  42.         return 'http://m.economictimes.com/PDAET/articleshow/' + art
  43.         return 'http://economictimes.indiatimes.com/articleshow/' + art + '?prtpage=1'
  44.  
  45.     def get_article_url(self, article):
  46.         rurl = article.get('guid',  None)
  47.         if (rurl.find('/quickieslist/') > 0) or (rurl.find('/quickiearticleshow/') > 0):
  48.            return None
  49.         return rurl
  50.  
  51.     def preprocess_html(self, soup):
  52.         for item in soup.findAll(style=True):
  53.             del item['style']
  54.         return self.adeify_images(soup)
  55.