home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / johm.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  73 lines

  1. import re
  2. from calibre.web.feeds.recipes import BasicNewsRecipe
  3.  
  4. class JournalofHospitalMedicine(BasicNewsRecipe):
  5.  
  6.     title       = 'Journal of Hospital Medicine'
  7.     __author__  = 'Kovid Goyal'
  8.     description = 'Medical news'
  9.     timefmt = ' [%d %b, %Y]'
  10.     needs_subscription = True
  11.     language = 'en'
  12.  
  13.     no_stylesheets = True
  14.     keep_only_tags = [dict(id=['articleTitle', 'articleMeta', 'fulltext'])]
  15.     remove_tags = [dict(attrs={'class':'licensedContent'})]
  16.  
  17.  
  18.    # TO LOGIN
  19.     def get_browser(self):
  20.         br = BasicNewsRecipe.get_browser()
  21.         br.open('http://www3.interscience.wiley.com/cgi-bin/home')
  22.         br.select_form(nr=0)
  23.         br['j_username'] = self.username
  24.         br['j_password'] = self.password
  25.         response = br.submit()
  26.         raw = response.read()
  27.         if '<h2>LOGGED IN</h2>' not in raw:
  28.             raise Exception('Login failed. Check your username and password')
  29.         return br
  30.  
  31.     #TO GET ARTICLE TOC
  32.     def johm_get_index(self):
  33.         return self.index_to_soup('http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1553-5606/currentissue')
  34.  
  35.     # To parse artice toc
  36.     def parse_index(self):
  37.         soup = self.johm_get_index()
  38.         toc = soup.find(id='issueTocGroups')
  39.         feeds = []
  40.         for group in toc.findAll('li', id=re.compile(r'group\d+')):
  41.             gtitle = group.find(attrs={'class':'subSectionHeading'})
  42.             if gtitle is None:
  43.                 continue
  44.             gtitle = self.tag_to_string(gtitle)
  45.             arts = group.find(attrs={'class':'articles'})
  46.             if arts is None:
  47.                 continue
  48.             self.log('Found section:', gtitle)
  49.             articles = []
  50.             for art in arts.findAll(attrs={'class':lambda x: x and 'tocArticle'
  51.                 in x}):
  52.                 a = art.find('a', href=True)
  53.                 if a is None:
  54.                     continue
  55.                 url = a.get('href')
  56.                 if url.startswith('/'):
  57.                     url = 'http://onlinelibrary.wiley.com' + url
  58.                 url = url.replace('/abstract', '/full')
  59.                 title = self.tag_to_string(a)
  60.                 a.extract()
  61.                 pm = art.find(attrs={'class':'productMenu'})
  62.                 if pm is not None:
  63.                     pm.extract()
  64.                 desc = self.tag_to_string(art)
  65.                 self.log('\tFound article:', title, 'at', url)
  66.                 articles.append({'title':title, 'url':url, 'description':desc,
  67.                     'date':''})
  68.             if articles:
  69.                 feeds.append((gtitle, articles))
  70.  
  71.         return feeds
  72.  
  73.