home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / nejm.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  79 lines

  1. # -*- coding: utf-8 -*-
  2. from calibre.web.feeds.recipes import BasicNewsRecipe
  3.  
  4. class NYTimes(BasicNewsRecipe):
  5.  
  6.     title       = 'New England Journal of Medicine'
  7.     __author__  = 'Kovid Goyal'
  8.     description = 'Medical news'
  9.     timefmt = ' [%d %b, %Y]'
  10.     needs_subscription = True
  11.     language = 'en'
  12.  
  13.     no_stylesheets = True
  14.     keep_only_tags = dict(id='content')
  15.  
  16.  
  17.     #TO LOGIN
  18.     def get_browser(self):
  19.         br = BasicNewsRecipe.get_browser()
  20.         br.open('http://www.nejm.org/action/showLogin?uri=http://www.nejm.org/')
  21.         br.select_form(name='frmLogin')
  22.         br['login'] = self.username
  23.         br['password'] = self.password
  24.         response = br.submit()
  25.         raw = response.read()
  26.         if '>Sign Out<' not in raw:
  27.             raise Exception('Login failed. Check your username and password')
  28.         return br
  29.  
  30.     #TO GET ARTICLE TOC
  31.     def nejm_get_index(self):
  32.         return self.index_to_soup('http://content.nejm.org/current.dtl')
  33.  
  34.     # To parse artice toc
  35.     def parse_index(self):
  36.         parse_soup = self.nejm_get_index()
  37.  
  38.         feeds = []
  39.  
  40.         div = parse_soup.find(attrs={'class':'tocContent'})
  41.         for group in div.findAll(attrs={'class':'articleGrouping'}):
  42.             feed_title = group.find(attrs={'class':'articleType'})
  43.             if feed_title is None:
  44.                 continue
  45.             feed_title = self.tag_to_string(feed_title)
  46.             articles = []
  47.             self.log('Found section:', feed_title)
  48.             for art in group.findAll(attrs={'class':lambda x: x and 'articleEntry'
  49.                 in x}):
  50.                 link = art.find(attrs={'class':lambda x:x and 'articleLink' in
  51.                     x})
  52.                 if link is None:
  53.                     continue
  54.                 a = link.find('a', href=True)
  55.                 if a is None:
  56.                     continue
  57.                 url = a.get('href')
  58.                 if url.startswith('/'):
  59.                     url = 'http://www.nejm.org'+url
  60.                 title = self.tag_to_string(a)
  61.                 self.log.info('\tFound article:', title, 'at', url)
  62.                 article = {'title':title, 'url':url, 'date':''}
  63.                 au = art.find(attrs={'class':'articleAuthors'})
  64.                 if au is not None:
  65.                     article['author'] = self.tag_to_string(au)
  66.                 desc = art.find(attrs={'class':'hover_text'})
  67.                 if desc is not None:
  68.                     desc = self.tag_to_string(desc)
  69.                     if 'author' in article:
  70.                         desc = ' by ' + article['author'] + ' ' +desc
  71.                     article['description'] = desc
  72.                 articles.append(article)
  73.             if articles:
  74.                 feeds.append((feed_title, articles))
  75.  
  76.         return feeds
  77.  
  78.  
  79.