home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / tech_world.recipe < prev    next >
Text File  |  2011-09-09  |  6KB  |  93 lines

  1. #!/usr/bin/env  python
  2. __license__     = 'GPL v3'
  3. __author__      = 'Lorenzo Vigentini'
  4. __copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
  5. __version__     = 'v1.01'
  6. __date__        = '14, January 2010'
  7. __description__ = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK'
  8.  
  9. '''
  10. http://www.techworld.com/
  11. '''
  12.  
  13. from calibre.web.feeds.news import BasicNewsRecipe
  14. from calibre.ptempfile import PersistentTemporaryFile
  15.  
  16. class techworld(BasicNewsRecipe):
  17.     __author__     = 'Lorenzo Vigentini'
  18.     description   = 'Techworld offers the latest breaking IT industry news, product reviews, enterprise software downloads, how-to articles and expert blogs for technical professionals and enterprise users in the UK'
  19.     cover_url     = 'http://www.techworld.com/graphics/header/site_logo.jpg'
  20.  
  21.     title          = 'TechWorld'
  22.     publisher      = 'IDG Communication'
  23.     category       = 'Apple, Mac, video, computing, product reviews, editing, cameras, production'
  24.  
  25.     language       = 'en'
  26.     timefmt        = '[%a, %d %b, %Y]'
  27.  
  28.     oldest_article        = 7
  29.     max_articles_per_feed = 15
  30.     use_embedded_content  = False
  31.     recursion             = 10
  32.  
  33.     remove_javascript     = True
  34.     no_stylesheets        = True
  35.  
  36.     temp_files = []
  37.     articles_are_obfuscated = True
  38.  
  39.     def get_obfuscated_article(self, url):
  40.         br = self.get_browser()
  41.         br.open(url)
  42.         response = br.follow_link(url_regex='?getDynamicPage&print$', nr = 0)
  43.         html = response.read()
  44.         self.temp_files.append(PersistentTemporaryFile('_fa.html'))
  45.         self.temp_files[-1].write(html)
  46.         self.temp_files[-1].close()
  47.         return self.temp_files[-1].name
  48.  
  49.     keep_only_tags     = [
  50.                             dict(name='div', attrs={'id':'articleBody'}),
  51.                             dict(name='h2', attrs={'class':'blogTitle'}),
  52.                             dict(name='h3', attrs={'class':'blogger'}),
  53.                         ]
  54.  
  55.     remove_tags        = [
  56.                             dict(name='div', attrs={'class':['submissionBar','mpuContainer']}),
  57.                             dict(name='div', attrs={'id':['breadcrumb','mainContentSidebar','articleIconsList','loginSubscribeBoxout']}),
  58.                             dict(name='ul', attrs={'class':'articleIconsList'})
  59.                         ]
  60.     remove_tags_after   = [
  61.                             dict(name='div', attrs={'id':'articleFooter'})
  62.                         ]
  63.  
  64.     feeds          = [
  65.                        (u'News', u'http://www.techworld.com/rss/feeds/techworld-news.xml'),
  66.                        (u'How-Tos', u'http://www.techworld.com/rss/feeds/techworld-how-tos.xml'),
  67.                        (u'Reviews', u'http://www.techworld.com/rss/feeds/techworld-reviews.xml'),
  68.                        (u'Features', u'http://www.techworld.com/rss/feeds/techworld-features.xml'),
  69.                        (u'Storage', u'http://www.techworld.com/rss/feeds/techworld-storage.xml'),
  70.                        (u'Applications', u'http://www.techworld.com/rss/feeds/techworld-applications.xml'),
  71.                        (u'Virtualization', u'http://www.techworld.com/rss/feeds/techworld-virtualisation.xml'),
  72.                        (u'Personal Tech', u'http://www.techworld.com/rss/feeds/techworld-personal-tech.xml'),
  73.                        (u'Green IT', u'http://www.techworld.com/rss/feeds/techworld-green-it.xml'),
  74.                        (u'Security', u'http://www.techworld.com/rss/feeds/techworld-security.xml'),
  75.                        (u'Operating Systems', u'http://www.techworld.com/rss/feeds/techworld-operating-systems.xml'),
  76.                        (u'Networking', u'http://www.techworld.com/rss/feeds/techworld-networking.xml'),
  77.                        (u'Mobile and Wireless', u'http://www.techworld.com/rss/feeds/techworld-mobile-wireless.xml'),
  78.                        (u'Data Centre', u'http://www.techworld.com/rss/feeds/techworld-data-centre.xml'),
  79.                        (u'SME', u'http://www.techworld.com/rss/feeds/techworld-sme.xml'),
  80.                        (u'TechWorld Blogs', u'http://blogs.techworld.com/atom.xml')
  81.                     ]
  82.  
  83.     extra_css = '''
  84.                 h1 {color:#0066CC;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
  85.                 h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
  86.                 h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
  87.                 h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; }
  88.                 h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;}
  89.                 .newsdate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
  90.                 .articleInfo {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;}
  91.                 img {align:left;}
  92.                 '''
  93.