Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / rstones.recipe < prev next >

Wrap

Text File | 2011-09-09 | 3KB | 83 lines

#!/usr/bin/env python __license__ = 'GPL v3' __author__ = 'Tony Stegall' __copyright__ = '2010, Tony Stegall or Tonythebookworm on mobileread.com' __version__ = 'v1.01' __date__ = '07, October 2010' __description__ = 'Rolling Stones Mag' ''' http://www.rollingstone.com ''' from calibre.web.feeds.news import BasicNewsRecipe class RollingStones(BasicNewsRecipe): __author__ = 'Tony Stegall' description = 'Rolling Stones Mag' cover_url = 'http://gallery.celebritypro.com/data/media/648/kid-rock-rolling-stone-cover.jpg' masthead_url = 'http://origin.myfonts.com/s/ec/cc-200804/Rolling_Stone-logo.gif' title = 'Rolling Stones Mag' category = 'Music Reviews, Movie Reviews, entertainment news' language = 'en' timefmt = '[%a, %d %b, %Y]' oldest_article = 15 max_articles_per_feed = 25 use_embedded_content = False no_stylesheets = True remove_javascript = True ##################################################################################### # cleanup section # ##################################################################################### keep_only_tags = [ dict(name='div', attrs={'class':['c65l']}), dict(name='div', attrs={'id':['col1']}), ] remove_tags = [ dict(name='div', attrs={'class': ['storyActions upper','storyActions lowerArticleNav']}), dict(name='div', attrs={'id': ['comments','related']}), ] feeds = [ (u'News', u'http://www.rollingstone.com/siteServices/rss/allNews'), (u'Blogs', u'http://www.rollingstone.com/siteServices/rss/allBlogs'), (u'Movie Reviews', u'http://www.rollingstone.com/siteServices/rss/movieReviews'), (u'Album Reviews', u'http://www.rollingstone.com/siteServices/rss/albumReviews'), (u'Song Reviews', u'http://www.rollingstone.com/siteServices/rss/songReviews'), ] def get_article_url(self, article): return article.get('guid', None) def append_page(self, soup, appendtag, position): ''' Some are the articles are multipage so the below function will get the articles that have <next> ''' pager = soup.find('li',attrs={'class':'next'}) if pager: nexturl = pager.a['href'] soup2 = self.index_to_soup(nexturl) texttag = soup2.find('div', attrs={'id':'storyTextContainer'}) for it in texttag.findAll(style=True): del it['style'] newpos = len(texttag.contents) self.append_page(soup2,texttag,newpos) texttag.extract() appendtag.insert(position,texttag)