Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / hindu.recipe < prev next >

Wrap

Text File | 2011-09-09 | 1.7 KB | 51 lines

from __future__ import with_statement __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' import time from calibre.web.feeds.news import BasicNewsRecipe class TheHindu(BasicNewsRecipe): title = u'The Hindu' language = 'en_IN' oldest_article = 7 __author__ = 'Kovid Goyal' max_articles_per_feed = 100 no_stylesheets = True keep_only_tags = [dict(id='content')] remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}), dict(id=['email-section', 'right-column', 'printfooter'])] extra_css = '.photo-caption { font-size: smaller }' def postprocess_html(self, soup, first_fetch): for t in soup.findAll(['table', 'tr', 'td','center']): t.name = 'div' return soup def parse_index(self): today = time.strftime('%Y-%m-%d') soup = self.index_to_soup( 'http://www.thehindu.com/todays-paper/tp-index/?date=' + today) div = soup.find(id='left-column') feeds = [] current_section = None current_articles = [] for x in div.findAll(['h3', 'div']): if current_section and x.get('class', '') == 'tpaper': a = x.find('a', href=True) if a is not None: current_articles.append({'url':a['href']+'?css=print', 'title':self.tag_to_string(a), 'date': '', 'description':''}) if x.name == 'h3': if current_section and current_articles: feeds.append((current_section, current_articles)) current_section = self.tag_to_string(x) current_articles = [] return feeds