home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / zdnet.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  69 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
  3.  
  4. '''
  5. Fetch zdnet.
  6. '''
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10.  
  11. class cdnet(BasicNewsRecipe):
  12.  
  13.     title = 'zdnet'
  14.     description = 'zdnet security'
  15.     __author__ = 'Oliver Niesner'
  16.     language = 'en'
  17.  
  18.     use_embedded_content   = False
  19.     timefmt = ' [%d %b %Y]'
  20.     max_articles_per_feed = 40
  21.     no_stylesheets = True
  22.     encoding = 'latin1'
  23.  
  24.  
  25.  
  26.     remove_tags = [dict(id='eyebrows'),
  27.            dict(id='header'),
  28.            dict(id='search'),
  29.            dict(id='nav'),
  30.            dict(id='blog-author-info'),
  31.            dict(id='post-tags'),
  32.            dict(id='bio-naraine'),
  33.            dict(id='bio-kennedy'),
  34.            dict(id='author-short-disclosure-kennedy'),
  35.            dict(id=''),
  36.            dict(name='div', attrs={'class':'banner'}),
  37.            dict(name='div', attrs={'class':'int'}),
  38.            dict(name='div', attrs={'class':'talkback clear space-2'}),
  39.            dict(name='div', attrs={'class':'content-1 clear'}),
  40.            dict(name='div', attrs={'class':'space-2'}),
  41.            dict(name='div', attrs={'class':'space-3'}),
  42.            dict(name='div', attrs={'class':'thumb-2 left'}),
  43.            dict(name='div', attrs={'class':'hotspot'}),
  44.            dict(name='div', attrs={'class':'hed hed-1 space-1'}),
  45.            dict(name='div', attrs={'class':'view-1 clear content-3 space-2'}),
  46.            dict(name='div', attrs={'class':'hed hed-1 space-1'}),
  47.            dict(name='div', attrs={'class':'hed hed-1'}),
  48.            dict(name='div', attrs={'class':'post-header'}),
  49.            dict(name='div', attrs={'class':'lvl-nav clear'}),
  50.            dict(name='div', attrs={'class':'t-share-overlay overlay-pop contain-overlay-4'}),
  51.            dict(name='p', attrs={'class':'tags'}),
  52.            dict(name='span', attrs={'class':'follow'}),
  53.            dict(name='span', attrs={'class':'int'}),
  54.            dict(name='h4', attrs={'class':'h s-4'}),
  55.            dict(name='a', attrs={'href':'http://www.twitter.com/ryanaraine'}),
  56.            dict(name='div', attrs={'class':'special1'})]
  57.     remove_tags_after = [dict(name='div', attrs={'class':'clear'})]
  58.  
  59.     feeds =  [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ]
  60.  
  61.  
  62.     def preprocess_html(self, soup):
  63.         for item in soup.findAll(style=True):
  64.             del item['style']
  65.         return soup
  66.  
  67.  
  68.  
  69.