home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / bbc.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  62 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. news.bbc.co.uk
  5. '''
  6. import re
  7. from calibre.web.feeds.recipes import BasicNewsRecipe
  8.  
  9. class BBC(BasicNewsRecipe):
  10.     title                  = 'BBC News'
  11.     __author__             = 'Darko Miletic, Starson17'
  12.     description            = 'News from UK. '
  13.     oldest_article         = 2
  14.     max_articles_per_feed  = 100
  15.     no_stylesheets         = True
  16.     #delay                  = 1
  17.     use_embedded_content   = False
  18.     encoding               = 'utf8'
  19.     publisher              = 'BBC'
  20.     category               = 'news, UK, world'
  21.     language               = 'en_GB'
  22.     publication_type       = 'newsportal'
  23.     extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
  24.     preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
  25.     conversion_options = {
  26.                              'comments'        : description
  27.                             ,'tags'            : category
  28.                             ,'language'        : language
  29.                             ,'publisher'       : publisher
  30.                             ,'linearize_tables': True
  31.                          }
  32.  
  33.     keep_only_tags    = [
  34.                        dict(name='div', attrs={'class':['layout-block-a layout-block']})
  35.                        ,dict(attrs={'class':['story-body','storybody']})
  36.                         ]
  37.  
  38.     remove_tags = [
  39.                        dict(name='div', attrs={'class':['story-feature related narrow', 'share-help', 'embedded-hyper',
  40.                                                     'story-feature wide ', 'story-feature narrow']}),
  41.                        dict(id=['hypertab', 'comment-form']),
  42.                         ]
  43.  
  44.     remove_attributes = ['width','height']
  45.  
  46.     feeds          = [
  47.                       ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
  48.                       ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
  49.                       ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
  50.                       ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
  51.                       ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
  52.                       ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
  53.                       ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
  54.                       ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
  55.                       ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
  56.                       ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
  57.                       ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
  58.                       ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
  59.                       ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
  60.                     ]
  61.  
  62.