Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / instapaper.recipe < prev next >

Wrap

Text File | 2011-09-09 | 3KB | 71 lines

from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1299694372(BasicNewsRecipe): title = u'Instapaper' __author__ = 'Darko Miletic' publisher = 'Instapaper.com' category = 'info, custom, Instapaper' oldest_article = 365 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True remove_tags = [ dict(name='div', attrs={'id':'text_controls_toggle'}) ,dict(name='script') ,dict(name='div', attrs={'id':'text_controls'}) ,dict(name='div', attrs={'id':'editing_controls'}) ,dict(name='div', attrs={'class':'bar bottom'}) ] use_embedded_content = False needs_subscription = True INDEX = u'http://www.instapaper.com' LOGIN = INDEX + u'/user/login' feeds = [ (u'Instapaper Unread', u'http://www.instapaper.com/u'), (u'Instapaper Starred', u'http://www.instapaper.com/starred') ] def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None: br.open(self.LOGIN) br.select_form(nr=0) br['username'] = self.username if self.password is not None: br['password'] = self.password br.submit() return br def parse_index(self): totalfeeds = [] lfeeds = self.get_feeds() for feedobj in lfeeds: feedtitle, feedurl = feedobj self.report_progress(0, 'Fetching feed'+' %s...'%(feedtitle if feedtitle else feedurl)) articles = [] soup = self.index_to_soup(feedurl) for item in soup.findAll('div', attrs={'class':'cornerControls'}): #description = self.tag_to_string(item.div) atag = item.a if atag and atag.has_key('href'): url = atag['href'] articles.append({ 'url' :url }) totalfeeds.append((feedtitle, articles)) return totalfeeds def print_version(self, url): return 'http://www.instapaper.com' + url def populate_article_metadata(self, article, soup, first): article.title = soup.find('title').contents[0].strip() def postprocess_html(self, soup, first_fetch): for link_tag in soup.findAll(attrs={"id" : "story"}): link_tag.insert(0,'<h1>'+soup.find('title').contents[0].strip()+'</h1>') return soup