home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / bbc_sport.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  66 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2010, limawhiskey <limawhiskey at gmail.com>'
  3. '''
  4. news.bbc.co.uk/sport/
  5. '''
  6. import re
  7. from calibre.web.feeds.recipes import BasicNewsRecipe
  8.  
  9. class BBC(BasicNewsRecipe):
  10.     title                  = 'BBC Sport'
  11.     __author__             = 'limawhiskey, Darko Miletic, Starson17'
  12.     description            = 'Sports news from UK. A fast version that does not download pictures'
  13.     oldest_article         = 2
  14.     max_articles_per_feed  = 100
  15.     no_stylesheets         = True
  16.     use_embedded_content   = False
  17.     encoding               = 'utf8'
  18.     publisher              = 'BBC'
  19.     category               = 'sport, news, UK, world'
  20.     language               = 'en_GB'
  21.     publication_type       = 'newsportal'
  22.     extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
  23.     preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
  24.     conversion_options = {
  25.                              'comments'        : description
  26.                             ,'tags'            : category
  27.                             ,'language'        : language
  28.                             ,'publisher'       : publisher
  29.                             ,'linearize_tables': True
  30.                          }
  31.  
  32.     keep_only_tags  = [
  33.                        dict(name='div', attrs={'class':['ds','mxb']}),
  34.                        dict(attrs={'class':['story-body','storybody']})
  35.                       ]
  36.  
  37.     remove_tags     = [
  38.                        dict(name='div', attrs={'class':['storyextra', 'share-help', 'embedded-hyper', \
  39.                        'story-feature wide ', 'story-feature narrow', 'cap', 'caption', 'q1', 'sihf', \
  40.                        'mva', 'videoInStoryC', 'sharesb', 'mvtb']}),
  41.                        dict(name=['img']), dict(name=['br'])
  42.                       ]
  43.  
  44.     remove_attributes = ['width','height']
  45.  
  46.     feeds          = [
  47.                       ('Sport Front Page', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/front_page/rss.xml'),
  48.                       ('Football', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/football/rss.xml'),
  49.                       ('Cricket', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/cricket/rss.xml'),
  50.                       ('Formula 1', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/formula_one/rss.xml'),
  51.                       ('Commonwealth Games', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/commonwealth_games/delhi_2010/rss.xml'),
  52.                       ('Golf', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/golf/rss.xml'),
  53.                       ('Rugby Union', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_union/rss.xml'),
  54.                       ('Rugby League', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/rugby_league/rss.xml'),
  55.                       ('Tennis', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/tennis/rss.xml'),
  56.                       ('Motorsport', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/motorsport/rss.xml'),
  57.                       ('Boxing', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/boxing/rss.xml'),
  58.                       ('Athletics', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/athletics/rss.xml'),
  59.                       ('Snooker', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/snooker/rss.xml'),
  60.                       ('Horse Racing', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/horse_racing/rss.xml'),
  61.                       ('Cycling', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/cycling/rss.xml'),
  62.                       ('Disability Sport', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/disability_sport/rss.xml'),
  63.                       ('Other Sport', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/other_sports/rss.xml'),
  64.                       ('Olympics 2012', 'http://newsrss.bbc.co.uk/rss/sportonline_uk_edition/olympics/london_2012/rss.xml'),
  65.                      ]
  66.