home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / tomshardware.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  80 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. tomshardware.com/us
  7. '''
  8.  
  9. import urllib
  10. from calibre.web.feeds.recipes import BasicNewsRecipe
  11.  
  12. class Tomshardware(BasicNewsRecipe):
  13.     title               = "Tom's Hardware US"
  14.     __author__          = 'Darko Miletic'
  15.     description         = 'Hardware reviews and News'
  16.     publisher           = "Tom's Hardware"
  17.     category            = 'news, IT, hardware, USA'
  18.     no_stylesheets      = True
  19.     needs_subscription  = True
  20.     language = 'en'
  21.  
  22.     INDEX               = 'http://www.tomshardware.com'
  23.     LOGIN               = INDEX + '/membres/'
  24.     remove_javascript   = True
  25.     use_embedded_content= False
  26.  
  27.     html2lrf_options = [
  28.                           '--comment', description
  29.                         , '--category', category
  30.                         , '--publisher', publisher
  31.                         ]
  32.  
  33.     html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
  34.  
  35.     def get_browser(self):
  36.         br = BasicNewsRecipe.get_browser()
  37.         br.open(self.INDEX+'/us/')
  38.         if self.username is not None and self.password is not None:
  39.             data = urllib.urlencode({ 'action':'login_action'
  40.                                      ,'r':self.INDEX+'/us/'
  41.                                      ,'login':self.username
  42.                                      ,'mdp':self.password
  43.                                    })
  44.             br.open(self.LOGIN,data)
  45.         return br
  46.  
  47.     remove_tags = [
  48.                      dict(name='div' , attrs={'id':'header' })
  49.                     ,dict(name='object')
  50.                   ]
  51.  
  52.     feeds = [
  53.               (u'Latest Articles', u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-2.xml'          )
  54.              ,(u'Latest News'    , u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-1.xml')
  55.             ]
  56.  
  57.     def print_version(self, url):
  58.         main, sep, rest = url.rpartition('.html')
  59.         rmain, rsep, article_id = main.rpartition(',')
  60.         tmain, tsep, trest = rmain.rpartition('/reviews/')
  61.         rind = 'http://www.tomshardware.com/news_print.php?p1='
  62.         if tsep:
  63.            rind = 'http://www.tomshardware.com/review_print.php?p1='
  64.         return rind + article_id
  65.  
  66.     def cleanup_image_tags(self,soup):
  67.         for item in soup.findAll('img'):
  68.             for attrib in ['height','width','border','align']:
  69.                 if item.has_key(attrib):
  70.                    del item[attrib]
  71.         return soup
  72.  
  73.     def preprocess_html(self, soup):
  74.         del(soup.body['onload'])
  75.         for item in soup.findAll(style=True):
  76.             del item['style']
  77.         for it in soup.findAll('span'):
  78.             it.name="div"
  79.         return self.cleanup_image_tags(soup)
  80.