home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / maximum_pc.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  44 lines

  1. from calibre.ptempfile import PersistentTemporaryFile
  2. from calibre.web.feeds.news import BasicNewsRecipe
  3.  
  4. class AdvancedUserRecipe1276930924(BasicNewsRecipe):
  5.     title          = u'Maximum PC'
  6.     __author__           = 'rty'
  7.     description           = 'Maximum PC'
  8.     publisher             = 'http://www.maximumpc.com'
  9.     category              = 'news, computer, technology'
  10.     language = 'en'
  11.     oldest_article = 30
  12.     max_articles_per_feed = 100
  13.     remove_javascript = True
  14.     use_embedded_content   = False
  15.     no_stylesheets = True
  16.     language = 'en'
  17.     temp_files = []
  18.     articles_are_obfuscated = True
  19.     feeds          = [(u'News', u'http://www.maximumpc.com/articles/4/feed'),
  20.                           (u'Reviews', u'http://www.maximumpc.com/articles/40/feed'),
  21.                           (u'Editors Blog', u'http://www.maximumpc.com/articles/6/feed'),
  22.                           (u'How-to', u'http://www.maximumpc.com/articles/32/feed'),
  23.                           (u'Features', u'http://www.maximumpc.com/articles/31/feed'),
  24.                            (u'From the Magazine', u'http://www.maximumpc.com/articles/72/feed')
  25.                          ]
  26.     keep_only_tags = [
  27.                                dict(name='div', attrs={'class':['print-title','article_body']}),
  28.                               ]
  29.     remove_tags = [
  30.                     dict(name='div', attrs={'class':'comments-tags-actions'}),
  31.                           ]
  32.     remove_tags_before  = dict(name='div', attrs={'class':'print-title'})
  33.     remove_tags_after     = dict(name='div', attrs={'class':'meta-content'})
  34.  
  35.     def get_obfuscated_article(self, url):
  36.         br = self.get_browser()
  37.         br.open(url)
  38.         response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0)
  39.         html = response.read()
  40.         self.temp_files.append(PersistentTemporaryFile('_fa.html'))
  41.         self.temp_files[-1].write(html)
  42.         self.temp_files[-1].close()
  43.         return self.temp_files[-1].name
  44.