home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / reuters.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  57 lines

  1. import re
  2.  
  3. from calibre.web.feeds.news import BasicNewsRecipe
  4.  
  5.  
  6. class Reuters(BasicNewsRecipe):
  7.  
  8.     title = 'Reuters'
  9.     description = 'Global news'
  10.     __author__ = 'Kovid Goyal and Sujata Raman'
  11.     use_embedded_content   = False
  12.     language = 'en'
  13.  
  14.     max_articles_per_feed = 10
  15.     no_stylesheets = True
  16.     remove_javascript = True
  17.  
  18.     extra_css      = '''
  19.                         body{font-family:arial,helvetica,sans;}
  20.                         h1{ font-size:larger ; font-weight:bold;  }
  21.                         .byline{color:#006E97;font-size:x-small; font-weight:bold;}
  22.                         .location{font-size:x-small; font-weight:bold;}
  23.                         .timestamp{font-size:x-small; }
  24.                      '''
  25.  
  26.     keep_only_tags = [dict(name='div', attrs={'class':'column2 gridPanel grid8'})]
  27.  
  28.     remove_tags = [dict(name='div', attrs={'id':['recommendedArticles','relatedNews','relatedVideo','relatedFactboxes']}),
  29.                    dict(name='p', attrs={'class':['relatedTopics']}),
  30.                    dict(name='a', attrs={'id':['fullSizeLink']}),
  31.                    dict(name='div', attrs={'class':['photoNav','relatedTopicButtons','articleComments','gridPanel grid8','footerHalf gridPanel grid1','gridPanel grid2','gridPanel grid3']}),
  32.                    # Remove the Tweet, Share this, Email and Print links below article title too!
  33.                    dict(name='div', attrs={'class':['columnRight']}),
  34.                   ]
  35.  
  36.     preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
  37.     [
  38.         ##(r'<HEAD>.*?</HEAD>' , lambda match : '<HEAD></HEAD>'),
  39.         (r'<div id="apple-rss-sidebar-background">.*?<!-- start Entries -->', lambda match : ''),
  40.         (r'<!-- end apple-rss-content-area -->.*?</body>', lambda match : '</body>'),
  41.         (r'<script.*?>.*?</script>', lambda match : ''),
  42.         (r'<body>.*?<div class="contentBand">', lambda match : '<body>'),
  43.         (r'<h3>Share:</h3>.*?</body>', lambda match : '<!-- END:: Shared Module id=36615 --></body>'),
  44.         (r'<div id="atools" class="articleTools">.*?<div class="linebreak">', lambda match : '<div class="linebreak">'),
  45.     ]]
  46.  
  47.  
  48.     feeds = [ ('Top Stories', 'http://feeds.reuters.com/reuters/topNews?format=xml'),
  49.               ('US News', 'http://feeds.reuters.com/reuters/domesticNews?format=xml'),
  50.               ('World News', 'http://feeds.reuters.com/reuters/worldNews?format=xml'),
  51.               ('Politics News', 'http://feeds.reuters.com/reuters/politicsNews?format=xml'),
  52.               ('Science News', 'http://feeds.reuters.com/reuters/scienceNews?format=xml'),
  53.               ('Environment News', 'http://feeds.reuters.com/reuters/Environment?format=xml'),
  54.               ('Technology News', 'http://feeds.reuters.com/reuters/technologyNews?format=xml'),
  55.               ('Oddly Enough News', 'http://feeds.reuters.com/reuters/oddlyEnoughNews?format=xml')
  56.             ]
  57.