Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / bay_citizen.recipe < prev next >

Wrap

Text File | 2011-09-09 | 2KB | 47 lines

from calibre.web.feeds.news import BasicNewsRecipe class TheBayCitizen(BasicNewsRecipe): title = 'The Bay Citizen' language = 'en' __author__ = 'noah' description = 'The Bay Citizen' publisher = 'The Bay Citizen' INDEX = u'http://www.baycitizen.org' category = 'news' oldest_article = 2 max_articles_per_feed = 20 no_stylesheets = True masthead_url = 'http://media.baycitizen.org/images/layout/logo1.png' feeds = [('Main Feed', 'http://www.baycitizen.org/feeds/stories/')] keep_only_tags = [dict(name='div', attrs={'class':'story'})] remove_tags = [ dict(name='div', attrs={'class':'socialBar'}), dict(name='div', attrs={'id':'text-resize'}), dict(name='div', attrs={'class':'story relatedContent'}), dict(name='div', attrs={'id':'comment_status_loading'}), ] def append_page(self, soup, appendtag, position): pager = soup.find('a',attrs={'class':'stry-next'}) if pager: nexturl = self.INDEX + pager['href'] soup2 = self.index_to_soup(nexturl) texttag = soup2.find('div', attrs={'class':'body'}) for it in texttag.findAll(style=True): del it['style'] newpos = len(texttag.contents) self.append_page(soup2,texttag,newpos) texttag.extract() appendtag.insert(position,texttag) def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] self.append_page(soup, soup.body, 3) garbage = soup.findAll(id='story-pagination') [trash.extract() for trash in garbage] garbage = soup.findAll('em', 'cont-from-prev') [trash.extract() for trash in garbage] return soup