home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / volksrant.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  87 lines

  1. # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
  2. from __future__ import with_statement
  3.  
  4. __license__   = 'GPL v3'
  5. __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
  6. __docformat__ = 'restructuredtext en'
  7.  
  8. '''
  9.  Modified by Tony Stegall
  10.  on 10/10/10 to include function to grab print version of articles
  11. '''
  12.  
  13. from datetime import date
  14. from calibre.web.feeds.news import BasicNewsRecipe
  15. '''
  16. added by Tony Stegall
  17. '''
  18. #######################################################
  19. from calibre.ptempfile import PersistentTemporaryFile
  20. #######################################################
  21.  
  22. class AdvancedUserRecipe1249039563(BasicNewsRecipe):
  23.     title          = u'De Volkskrant'
  24.     __author__     = 'acidzebra'
  25.     oldest_article = 7
  26.     max_articles_per_feed = 100
  27.     no_stylesheets = True
  28.     language = 'nl'
  29.  
  30.     extra_css      = '''
  31.                         body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  32.                         h1{font-size:large;}
  33.                      '''
  34.     '''
  35.       Change Log:
  36.         Date:       10/10/10  - Modified code to include obfuscated to get the print version
  37.         Author:   Tony Stegall
  38.  
  39.         Date:       01/01/11  - Modified for better results around December/January.
  40.         Author:   Martin Tarenskeen
  41.     '''
  42.    #######################################################################################################
  43.     temp_files = []
  44.     articles_are_obfuscated = True
  45.  
  46.     def get_obfuscated_article(self, url):
  47.         br = self.get_browser()
  48.         print 'THE CURRENT URL IS: ', url
  49.         br.open(url)
  50.         year = date.today().year
  51.  
  52.         try:
  53.             response = br.follow_link(url_regex='.*?(%d)(\\/)(article)(\\/)(print)(\\/)'%year, nr = 0)
  54.             html = response.read()
  55.         except:
  56.             year = year-1
  57.             try:
  58.                 response = br.follow_link(url_regex='.*?(%d)(\\/)(article)(\\/)(print)(\\/)'%year, nr = 0)
  59.                 html = response.read()
  60.             except:
  61.                 response = br.open(url)
  62.                 html = response.read()
  63.  
  64.  
  65.         self.temp_files.append(PersistentTemporaryFile('_fa.html'))
  66.         self.temp_files[-1].write(html)
  67.         self.temp_files[-1].close()
  68.         return self.temp_files[-1].name
  69.  
  70.    ###############################################################################################################
  71.  
  72.     '''
  73.       Change Log:
  74.        Date: 10/15/2010
  75.        Feeds updated by Martin Tarenskeen
  76.     '''
  77.  
  78.     feeds          = [
  79.                       (u'Laatste Nieuws', u'http://www.volkskrant.nl/rss/laatstenieuws.rss'),
  80.                       (u'Binnenland', u'http://www.volkskrant.nl/rss/nederland.rss'),
  81.                       (u'Buitenland', u'http://www.volkskrant.nl/rss/internationaal.rss'),
  82.                       (u'Economie', u'http://www.volkskrant.nl/rss/economie.rss'),
  83.                       (u'Sport', u'http://www.volkskrant.nl/rss/sport.rss'),
  84.                       (u'Cultuur', u'http://www.volkskrant.nl/rss/kunst.rss'),
  85.                       (u'Gezondheid & Wetenschap', u'http://www.volkskrant.nl/rss/wetenschap.rss'),
  86.                       (u'Internet & Media', u'http://www.volkskrant.nl/rss/media.rss') ]
  87.