home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / globe_and_mail.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  58 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3.  
  4. __copyright__ = '2010, Szing'
  5. __docformat__ = 'restructuredtext en'
  6.  
  7. '''
  8. globeandmail.com
  9. '''
  10.  
  11. import re
  12.  
  13. from calibre.web.feeds.news import BasicNewsRecipe
  14.  
  15. class AdvancedUserRecipe1287083651(BasicNewsRecipe):
  16.     title          = u'Globe & Mail'
  17.     __author__ = 'Kovid Goyal'
  18.     oldest_article = 2
  19.     no_stylesheets = True
  20.     max_articles_per_feed = 100
  21.     encoding               = 'utf8'
  22.     publisher              = 'Globe & Mail'
  23.     language               = 'en_CA'
  24.     extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
  25.  
  26.     feeds          = [
  27.       (u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
  28.       (u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
  29.       (u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
  30.       (u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
  31.       (u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
  32.       (u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
  33.       (u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
  34.       (u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
  35.       (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
  36.       (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
  37.       (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
  38.       (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
  39.       (u'Drive', u'http://www.theglobeandmail.com/auto/?service=rss')
  40.     ]
  41.  
  42.     preprocess_regexps = [
  43.         (re.compile(r'<head.*?</head>', re.DOTALL), lambda m: '<head></head>'),
  44.         (re.compile(r'<script.*?</script>', re.DOTALL), lambda m: ''),
  45.         ]
  46.  
  47.     remove_tags_before = dict(name='h1')
  48.     remove_tags = [
  49.             dict(name='div', attrs={'id':['ShareArticles', 'topStories']}),
  50.             dict(href=lambda x: x and 'tracking=' in x),
  51.             {'class':['articleTools', 'pagination', 'Ads', 'topad',
  52.                 'breadcrumbs', 'footerNav', 'footerUtil', 'downloadlinks']}]
  53.  
  54.     #Use the mobile version rather than the web version
  55.     def print_version(self, url):
  56.         return url.rpartition('?')[0] + '?service=mobile'
  57.  
  58.