home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / pc_world.recipe < prev    next >
Text File  |  2011-09-09  |  7KB  |  106 lines

  1. #!/usr/bin/env  python
  2. __license__     = 'GPL v3'
  3. __author__      = 'Lorenzo Vigentini'
  4. __copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
  5. __version__     = 'v1.01'
  6. __date__        = '14, January 2010'
  7. __description__ = 'PC World and Macworld consistently deliver editorial excellence through award-winning content and trusted product reviews.'
  8.  
  9. '''
  10. http://www.pcworld.com/
  11. '''
  12.  
  13. from calibre.web.feeds.news import BasicNewsRecipe
  14. from calibre.ptempfile import PersistentTemporaryFile
  15.  
  16. temp_files = []
  17. articles_are_obfuscated = True
  18.  
  19. class pcWorld(BasicNewsRecipe):
  20.     __author__    = 'Lorenzo Vigentini'
  21.     description   = 'PC World and Macworld consistently deliver editorial excellence through award-winning content and trusted product reviews.'
  22.     cover_url     = 'http://images.pcworld.com/images/common/header/header-logo.gif'
  23.  
  24.     title          = 'PCWorld '
  25.     publisher      = 'IDG Communication'
  26.     category       = 'PC, video, computing, product reviews, editing, cameras, production'
  27.  
  28.     language       = 'en'
  29.     timefmt        = '[%a, %d %b, %Y]'
  30.  
  31.     oldest_article        = 7
  32.     max_articles_per_feed = 20
  33.     use_embedded_content  = False
  34.     recursion             = 10
  35.  
  36.     remove_javascript     = True
  37.     no_stylesheets        = True
  38.  
  39.     def get_obfuscated_article(self, url):
  40.         br = self.get_browser()
  41.         br.open(url+'&print')
  42.  
  43.         response = br.follow_link(url, nr = 0)
  44.         html = response.read()
  45.  
  46.         self.temp_files.append(PersistentTemporaryFile('_fa.html'))
  47.         self.temp_files[-1].write(html)
  48.         self.temp_files[-1].close()
  49.         return self.temp_files[-1].name
  50.  
  51.     keep_only_tags     = [
  52.                             dict(name='div', attrs={'class':'article'})
  53.                         ]
  54.  
  55.     remove_tags        = [
  56.                             dict(name='div', attrs={'class':['toolBar','mac_tags','toolBar  btmTools','recommend longRecommend','recommend shortRecommend','textAds']}),
  57.                             dict(name='div', attrs={'id':['sidebar','comments','mac_tags']}),
  58.                             dict(name='ul', attrs={'class':'tools'}),
  59.                             dict(name='li', attrs={'class':'sub'})
  60.                         ]
  61.  
  62.     feeds          = [
  63.                        (u'PCWorld Headlines', u'http://feeds.pcworld.com/pcworld/latestnews'),
  64.                        (u'How-To', u'http://feeds.pcworld.com/pcworld/update/howto'),
  65.                        (u'Today@PCWorld', u'http://feeds.pcworld.com/pcworld/blogs/todayatpcw'),
  66.                        (u'Reviews', u'http://feeds.pcworld.com/pcworld/update/reviews'),
  67.                        (u'Most Popular Downloads', u'http://feeds.pcworld.com/pcworld/downloads/monthly'),
  68.                        (u'Answer Lines', u'http://feeds.pcworld.com/pcworld/blogs/answer_line'),
  69.                        (u'Digital Focus', u'http://feeds.pcworld.com/pcworld/blogs/digital_focus'),
  70.                        (u'Download this', u'http://feeds.pcworld.com/pcworld/blogs/download_this/'),
  71.                        (u'Game on', u'http://feeds.pcworld.com/pcworld/blogs/game_on'),
  72.                        (u'Geek tech', u'http://feeds.pcworld.com/pcworld/blogs/geektech/'),
  73.                        (u'Hassle free PC', u'http://feeds.pcworld.com/pcworld/blogs/hassle-free_pc'),
  74.                        (u'Mobile computing', u'http://feeds.pcworld.com/pcworld/blogs/mobile_computing'),
  75.                        (u'Security alert', u'http://feeds.pcworld.com/pcworld/blogs/security_alert/'),
  76.                        (u'BizFeed', u'http://feeds.pcworld.com/pcworld/businesscenter/bizfeed/'),
  77.                        (u'The Cost Cutter', u'http://feeds.pcworld.com/pcworld/businesscenter/cost_cutter/'),
  78.                        (u'Linux line', u'http://feeds.pcworld.com/pcworld/businesscenter/linuxline/'),
  79.                        (u'Net Work', u'http://feeds.pcworld.com/pcworld/businesscenter/network/'),
  80.                        (u'Peer-to-Peer', u'http://feeds.pcworld.com/pcworld/businesscenter/peertopeer/'),
  81.                        (u'Tech inciter', u'http://feeds.pcworld.com/pcworld/businesscenter/tech_inciter/'),
  82.                        (u'Gadgets and gear', u'http://feeds.pcworld.com/pcworld/update/gadgets'),
  83.                        (u'Home Entertainment', u'http://feeds.pcworld.com/pcworld/update/home-entertainment'),
  84.                        (u'Mobile Devices', u'http://feeds.pcworld.com/pcworld/update/mobile-devices')
  85.                      ]
  86.  
  87.     extra_css = '''
  88.                 h1 {color:#FF0000;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
  89.                 h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
  90.                 h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
  91.                 h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
  92.                 h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
  93.                 .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
  94.                 .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
  95.                 img {align:left;}
  96.                 #breadcrumb {}
  97.                 #breadcrumb ul {padding:0;margin:2px 0 0 0;}
  98.                 #breadcrumb li {list-style:none;display:inline;padding:0;}
  99.                 #breadcrumb li big {padding-right:2px;}
  100.                 #articleHead {border-top:1px solid #CCC;padding-top:5px;clear:both;margin-bottom:10px;}
  101.                 #articleHead h1 {font-size:25px;line-height:28px;margin:10px 0px 2px;padding:0px;}
  102.                 #articleHead h2 {font-size:14px;line-height:16px;margin:0px 0px 6px;padding:0px;}
  103.                 #articleHead p {font-size:15px;font-weight:bold;margin:0px;padding:0px;}
  104.                 #articleHead .date {color:#999;margin:0px 0px 20px;padding:0px;}
  105.                 '''
  106.