home *** CD-ROM | disk | FTP | other *** search
- from __future__ import with_statement
- __license__ = 'GPL 3'
- __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-
- import time
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class TheHindu(BasicNewsRecipe):
- title = u'The Hindu'
- language = 'en_IN'
-
- oldest_article = 7
- __author__ = 'Kovid Goyal'
- max_articles_per_feed = 100
- no_stylesheets = True
-
- keep_only_tags = [dict(id='content')]
- remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
- dict(id=['email-section', 'right-column', 'printfooter'])]
-
- extra_css = '.photo-caption { font-size: smaller }'
-
- def postprocess_html(self, soup, first_fetch):
- for t in soup.findAll(['table', 'tr', 'td','center']):
- t.name = 'div'
- return soup
-
- def parse_index(self):
- today = time.strftime('%Y-%m-%d')
- soup = self.index_to_soup(
- 'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
- div = soup.find(id='left-column')
- feeds = []
- current_section = None
- current_articles = []
- for x in div.findAll(['h3', 'div']):
- if current_section and x.get('class', '') == 'tpaper':
- a = x.find('a', href=True)
- if a is not None:
- current_articles.append({'url':a['href']+'?css=print',
- 'title':self.tag_to_string(a), 'date': '',
- 'description':''})
- if x.name == 'h3':
- if current_section and current_articles:
- feeds.append((current_section, current_articles))
- current_section = self.tag_to_string(x)
- current_articles = []
- return feeds
-
-
-