home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / dna.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  49 lines

  1. '''
  2. dnaindia.com
  3. '''
  4. import re
  5. from calibre.web.feeds.news import BasicNewsRecipe
  6.  
  7. class DNAIndia(BasicNewsRecipe):
  8.  
  9.     title       = 'DNA India'
  10.     description = 'Mumbai news, India news, World news, breaking news'
  11.     __author__  = 'Kovid Goyal'
  12.     language = 'en_IN'
  13.  
  14.     encoding    = 'cp1252'
  15.  
  16.     feeds       = [
  17.                    ('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
  18.                    ('Popular News', 'http://www.dnaindia.com/syndication/rss_popular.xml'),
  19.                    ('Recent Columns', 'http://www.dnaindia.com/syndication/rss_column.xml'),
  20.                    ('Mumbai', 'http://www.dnaindia.com/syndication/rss,catid-1.xml'),
  21.                    ('India', 'http://www.dnaindia.com/syndication/rss,catid-2.xml'),
  22.                    ('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
  23.                    ('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
  24.                    ('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
  25.                    ('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
  26.                    ('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
  27.                    ]
  28.     remove_tags = [{'id':['footer', 'lhs-col']}, {'class':['bottom', 'categoryHead',
  29.         'article_tools']}]
  30.     keep_only_tags = dict(id='middle-col')
  31.     remove_tags_after=[dict(attrs={'id':'story'})]
  32.     remove_attributes=['style']
  33.     no_stylesheets = True
  34.  
  35.     def print_version(self, url):
  36.         match = re.search(r'newsid=(\d+)', url)
  37.         if not match:
  38.             return url
  39.         return 'http://www.dnaindia.com/dnaprint.asp?newsid='+match.group(1)
  40.  
  41.     def postprocess_html(self, soup, first_fetch):
  42.         for t in soup.findAll(['table', 'tr', 'td']):
  43.             t.name = 'div'
  44.  
  45.         a = soup.find(href='http://www.3dsyndication.com/')
  46.         if a is not None:
  47.             a.parent.extract()
  48.         return soup
  49.