home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / rstones.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  83 lines

  1. #!/usr/bin/env  python
  2. __license__     = 'GPL v3'
  3. __author__      = 'Tony Stegall'
  4. __copyright__   = '2010, Tony Stegall or Tonythebookworm on mobileread.com'
  5. __version__     = 'v1.01'
  6. __date__        = '07, October 2010'
  7. __description__ = 'Rolling Stones Mag'
  8.  
  9. '''
  10. http://www.rollingstone.com
  11. '''
  12.  
  13. from calibre.web.feeds.news import BasicNewsRecipe
  14.  
  15. class RollingStones(BasicNewsRecipe):
  16.     __author__    = 'Tony Stegall'
  17.     description   = 'Rolling Stones Mag'
  18.     cover_url     = 'http://gallery.celebritypro.com/data/media/648/kid-rock-rolling-stone-cover.jpg'
  19.     masthead_url  = 'http://origin.myfonts.com/s/ec/cc-200804/Rolling_Stone-logo.gif'
  20.  
  21.  
  22.     title          = 'Rolling Stones Mag'
  23.     category       = 'Music Reviews, Movie Reviews, entertainment news'
  24.  
  25.     language       = 'en'
  26.     timefmt        = '[%a, %d %b, %Y]'
  27.  
  28.     oldest_article        = 15
  29.     max_articles_per_feed = 25
  30.     use_embedded_content  = False
  31.     no_stylesheets = True
  32.  
  33.     remove_javascript     = True
  34.     #####################################################################################
  35.     # cleanup section                                                                   #
  36.     #####################################################################################
  37.     keep_only_tags       = [
  38.                             dict(name='div', attrs={'class':['c65l']}),
  39.                             dict(name='div', attrs={'id':['col1']}),
  40.  
  41.  
  42.                            ]
  43.     remove_tags = [
  44.                     dict(name='div', attrs={'class': ['storyActions upper','storyActions lowerArticleNav']}),
  45.                     dict(name='div', attrs={'id': ['comments','related']}),
  46.                   ]
  47.  
  48.  
  49.     feeds          = [
  50.                        (u'News', u'http://www.rollingstone.com/siteServices/rss/allNews'),
  51.                        (u'Blogs', u'http://www.rollingstone.com/siteServices/rss/allBlogs'),
  52.                        (u'Movie Reviews', u'http://www.rollingstone.com/siteServices/rss/movieReviews'),
  53.                        (u'Album Reviews', u'http://www.rollingstone.com/siteServices/rss/albumReviews'),
  54.                        (u'Song Reviews', u'http://www.rollingstone.com/siteServices/rss/songReviews'),
  55.  
  56.  
  57.                      ]
  58.  
  59.  
  60.  
  61.     def get_article_url(self, article):
  62.         return article.get('guid',  None)
  63.  
  64.  
  65.     def append_page(self, soup, appendtag, position):
  66.         '''
  67.         Some are the articles are multipage so the below function
  68.         will get the articles that have <next>
  69.         '''
  70.         pager = soup.find('li',attrs={'class':'next'})
  71.         if pager:
  72.            nexturl = pager.a['href']
  73.            soup2 = self.index_to_soup(nexturl)
  74.            texttag = soup2.find('div', attrs={'id':'storyTextContainer'})
  75.            for it in texttag.findAll(style=True):
  76.                del it['style']
  77.            newpos = len(texttag.contents)
  78.            self.append_page(soup2,texttag,newpos)
  79.            texttag.extract()
  80.            appendtag.insert(position,texttag)
  81.  
  82.  
  83.