home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / le_temps.recipe < prev    next >
Text File  |  2011-09-09  |  7KB  |  106 lines

  1. #!/usr/bin/env python
  2. # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
  3. from __future__ import with_statement
  4.  
  5. __license__   = 'GPL v3'
  6. __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
  7. __docformat__ = 'restructuredtext en'
  8.  
  9.  
  10.  
  11. from calibre.web.feeds.news import BasicNewsRecipe
  12.  
  13. class LeTemps(BasicNewsRecipe):
  14.      title          = u'Le Temps'
  15.      oldest_article = 7
  16.      max_articles_per_feed = 100
  17.      __author__ = 'Kovid Goyal'
  18.      description = 'French news. Needs a subscription from http://www.letemps.ch'
  19.      no_stylesheets = True
  20.      remove_javascript = True
  21.      recursions = 1
  22.      encoding = 'UTF-8'
  23.      match_regexps = [r'http://www.letemps.ch/Page/Uuid/[-0-9a-f]+\|[1-9]']
  24.      language = 'fr'
  25.      needs_subscription = True
  26.  
  27.      def get_browser(self):
  28.          br = BasicNewsRecipe.get_browser(self)
  29.          br.open('http://www.letemps.ch/login')
  30.          br.select_form(nr=1)
  31.          br['username'] = self.username
  32.          br['password'] = self.password
  33.          raw = br.submit().read()
  34.          if '>Login' in raw:
  35.              raise ValueError('Failed to login to letemp.ch. Check '
  36.                      'your username and password')
  37.          return br
  38.  
  39.  
  40.      keep_only_tags = [dict(name='div', attrs={'id':'content'}),
  41.                         dict(name='div', attrs={'class':'story'})
  42.                       ]
  43.      remove_tags    = [dict(name='div', attrs={'id':['footer','sub']}),
  44.                        dict(name='div', attrs={'class':['box additional','box function','right','box links','follow']})]
  45.  
  46.  
  47.      extra_css      = '''h1{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;}
  48.                          .headline{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;color:#990000;}
  49.                          .summary_gal{color:#777777;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;}
  50.                          #capt{color:#1B1B1B;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;}
  51.                          #content{font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
  52.                          h2 {font-size: 24px; line-height: 25px; margin-bottom: 14px; text-transform:uppercase;}
  53.                          .author {font-size:x-small; margin: 0 0 5px 0; color:#797971; font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
  54.                          .lead {font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;font-weight: bold; margin: 10px 0;font-size:small;}
  55.                          p {margin: 0 0 10px 0;}
  56.                          h3{font-size:small;font-weight:bold;}
  57.                          .heading{color:#940026;font-size:x-small;}
  58.                          .description{font-size:x-small;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;color:#797971; }
  59.                          a {color:#1B1B1B; font-size:small;}
  60.                          .linkbox{font-size:x-small;color:#1B1B1B;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} '''
  61.  
  62.      feeds          = [
  63.                               (u'Actualit\xe9', 'http://www.letemps.ch/rss/site/'),
  64.                               ('Monde', 'http://www.letemps.ch/rss/site/actualite/monde'),
  65.                               (u'Suisse & R\xe9gions', 'http://www.letemps.ch/rss/site/actualite/suisse_regions'),
  66.                               ('Sciences & Environnement', 'http://www.letemps.ch/rss/site/actualite/sciences_environnement'),
  67.                               (u'Soci\xe9t\xe9', 'http://www.letemps.ch/rss/site/actualite/societe'),
  68.                               ('Economie & Finance', 'http://www.letemps.ch/rss/site/economie_finance'),
  69.                               ('Economie & Finance - Finance', 'http://www.letemps.ch/rss/site/economie_finance/finance'),
  70.                               ('Economie & Finance - Fonds de placement', 'http://www.letemps.ch/rss/site/economie_finance/fonds_placement'),
  71.                               (u'Economie & Finance - Carri\xe9res', 'http://www.letemps.ch/rss/site/economie_finance/carrieres'),
  72.                               ('Culture', 'http://www.letemps.ch/rss/site/culture'),
  73.                               (u'Culture - Cin\xe9ma', 'http://www.letemps.ch/rss/site/culture/cinema'),
  74.                               ('Culture - Musiques', 'http://www.letemps.ch/rss/site/culture/musiques'),
  75.                               (u'Culture - Sc\xe9nes', 'http://www.letemps.ch/rss/site/culture/scenes'),
  76.                               ('Culture - Arts plastiques', 'http://www.letemps.ch/rss/site/culture/arts_plastiques'),
  77.                               ('Livres', 'http://www.letemps.ch/rss/site/culture/livres'),
  78.                               ('Opinions', 'http://www.letemps.ch/rss/site/opinions'),
  79.                               ('Opinions - Editoriaux', 'http://www.letemps.ch/rss/site/opinions/editoriaux'),
  80.                               (u'Opinions - Invit\xe9s', 'http://www.letemps.ch/rss/site/opinions/invites'),
  81.                               ('Opinions - Chroniques', 'http://www.letemps.ch/rss/site/opinions/chroniques'),
  82.                               ('LifeStyle', 'http://www.letemps.ch/rss/site/lifestyle'),
  83.                               ('LifeStyle - Luxe', 'http://www.letemps.ch/rss/site/lifestyle/luxe'),
  84.                               ('LifeStyle - Horlogerie & Joaillerie', 'http://www.letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'),
  85.                               ('LifeStyle - Design', 'http://www.letemps.ch/rss/site/lifestyle/design'),
  86.                               ('LifeStyle - Voyages', 'http://www.letemps.ch/rss/site/lifestyle/voyages'),
  87.                               ('LifeStyle - Gastronomie', 'http://www.letemps.ch/rss/site/lifestyle/gastronomie'),
  88.                               ('LifeStyle - Architecture & Immobilier', 'http://www.letemps.ch/rss/site/lifestyle/architecture_immobilier'),
  89.                               ('LifeStyle - Automobile', 'http://www.letemps.ch/rss/site/lifestyle/automobile'),
  90.                               ('Sports', 'http://www.letemps.ch/rss/site/actualite/sports'),
  91.                              ]
  92.  
  93.      def postprocess_html(self, soup, first):
  94.         for tag in soup.findAll('div', attrs = {'class':'box pagination'}):
  95.             tag.extract()
  96.         if not first:
  97.             h = soup.find('h1')
  98.             if h is not None:
  99.                 h.extract()
  100.         return soup
  101.  
  102.    #  def print_version(self, url):
  103.    #         return url.replace('Page', 'Facet/print')
  104.  
  105.  
  106.