home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / telegraph_uk.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  68 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. telegraph.co.uk
  5. '''
  6.  
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8.  
  9. class TelegraphUK(BasicNewsRecipe):
  10.     title                 = 'Telegraph.co.uk'
  11.     __author__            = 'Darko Miletic and Sujata Raman'
  12.     description           = 'News from United Kingdom'
  13.     oldest_article        = 2
  14.     category              = 'news, politics, UK'
  15.     publisher             = 'Telegraph Media Group ltd.'    
  16.     max_articles_per_feed = 100
  17.     no_stylesheets        = True
  18.     language              = 'en_GB'
  19.     remove_empty_feeds    = True
  20.     use_embedded_content  = False
  21.  
  22.     extra_css           = '''
  23.                         h1{font-family :Arial,Helvetica,sans-serif; font-size:large; }
  24.                         h2{font-family :Arial,Helvetica,sans-serif; font-size:x-small; color:#444444;}
  25.                         .story{font-family :Arial,Helvetica,sans-serif; font-size: x-small;}
  26.                         .byline{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
  27.                         a{color:#234B7B; }
  28.                         .imageExtras{color:#666666; font-family :Arial,Helvetica,sans-serif; font-size: xx-small;}
  29.                         '''
  30.  
  31.     conversion_options = {
  32.                           'comment'   : description
  33.                         , 'tags'      : category
  34.                         , 'publisher' : publisher
  35.                         , 'language'  : language
  36.                         }
  37.                         
  38.                         
  39.     keep_only_tags      = [
  40.                            dict(name='div', attrs={'class':['storyHead','byline']})
  41.                           ,dict(name='div', attrs={'id':'mainBodyArea'           })
  42.                           ]
  43.     remove_tags         = [dict(name='div', attrs={'class':['related_links_inline',"imgindex","next","prev","gutterUnder",'ssImgHide','imageExtras','ssImg hide','related_links_video']})
  44.                           ,dict(name='ul' , attrs={'class':['shareThis shareBottom']})
  45.                           ,dict(name='span', attrs={'class':['num','placeComment']})
  46.                           ]
  47.  
  48.     feeds               = [
  49.                          (u'UK News'        , u'http://www.telegraph.co.uk/news/uknews/rss'                                      )
  50.                         ,(u'World News'     , u'http://www.telegraph.co.uk/news/worldnews/rss'                                   )
  51.                         ,(u'Politics'       , u'http://www.telegraph.co.uk/news/newstopics/politics/rss'                         )
  52.                         ,(u'Finance'        , u'http://www.telegraph.co.uk/finance/rss'                                          )
  53.                         ,(u'Technology News', u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologynews/rss'   )
  54.                         ,(u'UK News'        , u'http://www.telegraph.co.uk/scienceandtechnology/technology/technologyreviews/rss')
  55.                         ,(u'Science News'   , u'http://www.telegraph.co.uk/scienceandtechnology/science/sciencenews/rss'         )
  56.                         ,(u'Sport'          , u'http://www.telegraph.co.uk/sport/rss'                                            )
  57.                         ,(u'Earth News'     , u'http://www.telegraph.co.uk/earth/earthnews/rss'                                  )
  58.                         ,(u'Comment'        , u'http://www.telegraph.co.uk/comment/rss'                                          )
  59.                         ,(u'Travel'        , u'http://www.telegraph.co.uk/travel/rss'                                            )
  60.                         ,(u'How about that?', u'http://www.telegraph.co.uk/news/newstopics/howaboutthat/rss'                     )
  61.                          ]
  62.  
  63.     def get_article_url(self, article):
  64.         url = article.get('link', None)
  65.         if 'picture-galleries' in url or 'pictures' in url or 'picturegalleries' in url :
  66.             url = None
  67.         return url
  68.