home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / intelligencer.recipe < prev    next >
Text File  |  2011-09-09  |  1KB  |  47 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5.  
  6. '''
  7. Inteligencer.ca
  8. '''
  9.  
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11.  
  12. class Inteligencer(BasicNewsRecipe):
  13.     title                 = u'Intelligencer'
  14.     oldest_article        = 7
  15.     max_articles_per_feed = 100
  16.     encoding              = 'utf-8'
  17.     language = 'en'
  18.  
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     lang                  = 'en-CA'
  22.     
  23.     conversion_options = {
  24.                           'language'         : lang
  25.                         , 'pretty_print'     : True
  26.                         }
  27.                         
  28.     remove_attributes = ['style','width','height','font','border','align','action','onload']
  29.  
  30.     keep_only_tags     = [dict(name='td',attrs={'colspan':'2'})]
  31.  
  32.     remove_tags = [
  33.                     dict(name=['object','link','embed','iframe'])
  34.                    ,dict(name='div',attrs={'id':'header'})
  35.                   ]
  36.     feeds          = [(u'Recent News', u'http://www.intelligencer.ca/rss/')]
  37.  
  38.     def print_version(self, url):
  39.         return url.replace('/ArticleDisplay.aspx?','/PrintArticle.aspx?')
  40.         
  41.     def preprocess_html(self, soup):
  42.         for item in soup.findAll('td'):
  43.             del item['colspan']
  44.             item.name = 'div'
  45.         return soup
  46.         
  47.