home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / hindu_business_line.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  54 lines

  1. from __future__ import with_statement
  2. __license__ = 'GPL 3'
  3. __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
  4.  
  5. import re
  6. from calibre.web.feeds.news import BasicNewsRecipe
  7.  
  8. class TheHindu(BasicNewsRecipe):
  9.     title                 = u'The Business Line'
  10.     language = 'en_IN'
  11.  
  12.     oldest_article        = 7
  13.     __author__            = 'Dhiru'
  14.     max_articles_per_feed = 100
  15.     no_stylesheets = True
  16.  
  17.     remove_tags_before = {'name':'font', 'class':'storyhead'}
  18.     preprocess_regexps = [
  19.                 (re.compile(r'<!-- story ends -->.*', re.DOTALL),
  20.                  lambda match: '</body></html>'),
  21.                           ]
  22.     extra_css = '''
  23.                 .storyhead{font-family:Arial,Helvetica,sans-serif; font-size:large; color:#000099;}
  24.                 body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; text-align:left;}
  25.                 '''
  26.     feeds          = [
  27.      (u'Main - Latest News', u'http://www.thehindubusinessline.com/rss/blnus.xml'),
  28.        (u'Main - Front Page', u'http://www.thehindubusinessline.com/rss/14hdline.xml'),
  29.       (u'Main - Corporate', u'http://www.thehindubusinessline.com/rss/02hdline.xml'),
  30.       (u'Main - Market', u'http://www.thehindubusinessline.com/rss/05hdline.xml'),
  31.       (u'Main - Opinion', u'http://www.thehindubusinessline.com/rss/04hdline.xml'),
  32.       (u'Main - Infotech', u'http://www.thehindubusinessline.com/rss/15hdline.xml'),
  33.       (u'Main - Marketing', u'http://www.thehindubusinessline.com/rss/19hdline.xml'),
  34.       (u'Main - Money & banking',
  35.        u'http://www.thehindubusinessline.com/rss/06hdline.xml'),
  36.       (u'Main - Agri & Commodities', u'http://www.thehindubusinessline.com/rss/07hdline.xml'),
  37.       (u'Industry',
  38.        u'http://www.thehindubusinessline.com/rss/03hdline.xml'),
  39.       (u'Logistic',
  40.        u'http://www.thehindubusinessline.com/rss/09hdline.xml'),
  41.       (u'Result', u'http://www.thehindubusinessline.com/rss/26hdline.xml'),
  42.       (u'Government',
  43.        u'http://www.thehindubusinessline.com/rss/27hdline.xml'),
  44.       (u'Investment World',
  45.        u'http://www.thehindubusinessline.com/rss/iw20hdline.xml'),
  46.       (u'Supplement - Life',
  47.        u'http://www.thehindubusinessline.com/rss/lf10hdline.xml')
  48.       ]
  49.  
  50.     def postprocess_html(self, soup, first_fetch):
  51.         for t in soup.findAll(['table', 'tr', 'td','center']):
  52.             t.name = 'div'
  53.         return soup
  54.