home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / cyberpresse.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  57 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2.  
  3. class Cyberpresse(BasicNewsRecipe):
  4.  
  5.     title          = u'Cyberpresse'
  6.     __author__     = 'balok and Sujata Raman'
  7.     description    = 'Canadian news in French'
  8.     language = 'fr'
  9.  
  10.     oldest_article = 7
  11.     max_articles_per_feed = 100
  12.     no_stylesheets = True
  13.     remove_javascript     = True
  14.     html2lrf_options = ['--left-margin=0','--right-margin=0','--top-margin=0','--bottom-margin=0']
  15.     encoding = 'utf-8'
  16.  
  17.  
  18.     keep_only_tags = [dict(name='div', attrs={'class':'article-page'}),
  19.                       dict(name='div', attrs={'id':'articlePage'}),
  20.                       ]
  21.  
  22.     extra_css = '''
  23.                     .photodata{font-family:Arial,Helvetica,Verdana,sans-serif;color: #999999; font-size: 90%; }
  24.                     h1{font-family:Georgia,Times,serif ; font-size: large; }
  25.                     .amorce{font-family:Arial,Helvetica,Verdana,sans-serif; font-weight:bold;}
  26.                     .article-page{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
  27.                     #articlePage{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
  28.                     .auteur{font-family:Georgia,Times,sans-serif; font-size: 90%; color:#006699 ;}
  29.                     .bodyText{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
  30.                     .byLine{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%;}
  31.                     .entry{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
  32.                     .minithumb-auteurs{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%; }
  33.                     a{color:#003399; font-weight:bold; }
  34.                 '''
  35.  
  36.     remove_tags = [
  37.                         dict(name='div', attrs={'class':['centerbar','colspan','share-module']}),
  38.                         dict(name='p', attrs={'class':['zoom']}),
  39.                         dict(name='ul', attrs={'class':['stories']}),
  40.                         dict(name='h4', attrs={'class':['general-cat']}),
  41.                     ]
  42.  
  43.     feeds          = [(u'Manchettes', u'http://www.cyberpresse.ca/rss/225.xml'),
  44.                       (u'Capitale nationale', u'http://www.cyberpresse.ca/rss/501.xml'),
  45.                       (u'Opinions', u'http://www.cyberpresse.ca/rss/977.xml'),
  46.                       (u'Insolite', u'http://www.cyberpresse.ca/rss/279.xml')
  47.                       ]
  48.  
  49.     def postprocess_html(self, soup, first):
  50.  
  51.          for tag in soup.findAll(name=['i','strong']):
  52.              tag.name = 'div'
  53.  
  54.          return soup
  55.  
  56.  
  57.