home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / smashing.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  51 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. www.smashingmagazine.com
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class SmashingMagazine(BasicNewsRecipe):
  12.     title                 = 'Smashing Magazine'
  13.     __author__            = 'Darko Miletic'
  14.     description           = 'We smash you with the information that will make your life easier, really'
  15.     oldest_article        = 20
  16.     language              = 'en'
  17.     max_articles_per_feed = 100
  18.     no_stylesheets        = True
  19.     use_embedded_content  = False
  20.     publisher             = 'Smashing Magazine'
  21.     category              = 'news, web, IT, css, javascript, html'
  22.     encoding              = 'utf-8'
  23.  
  24.     conversion_options = {
  25.                              'comments'    : description
  26.                             ,'tags'        : category
  27.                             ,'publisher'   : publisher
  28.                          }
  29.  
  30.     keep_only_tags = [dict(name='div', attrs={'id':'leftcolumn'})]
  31.     remove_tags_after = dict(name='ul',attrs={'class':'social'})
  32.     remove_tags = [
  33.                     dict(name=['link','object'])
  34.                    ,dict(name='h1',attrs={'class':'logo'})
  35.                    ,dict(name='div',attrs={'id':'booklogosec'})
  36.                    ,dict(attrs={'src':'http://media2.smashingmagazine.com/wp-content/uploads/images/the-smashing-book/smbook6.gif'})
  37.                   ]
  38.  
  39.     feeds          = [(u'Articles', u'http://rss1.smashingmagazine.com/feed/')]
  40.  
  41.     def preprocess_html(self, soup):
  42.         for iter in soup.findAll('div',attrs={'class':'leftframe'}):
  43.             it = iter.find('h1')
  44.             if it == None:
  45.                iter.extract()
  46.         for item in soup.findAll('img'):
  47.             oldParent = item.parent
  48.             if oldParent.name == 'a':
  49.                oldParent.name = 'div'
  50.         return soup
  51.