home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
nejm.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
3KB
|
79 lines
# -*- coding: utf-8 -*-
from calibre.web.feeds.recipes import BasicNewsRecipe
class NYTimes(BasicNewsRecipe):
title = 'New England Journal of Medicine'
__author__ = 'Kovid Goyal'
description = 'Medical news'
timefmt = ' [%d %b, %Y]'
needs_subscription = True
language = 'en'
no_stylesheets = True
keep_only_tags = dict(id='content')
#TO LOGIN
def get_browser(self):
br = BasicNewsRecipe.get_browser()
br.open('http://www.nejm.org/action/showLogin?uri=http://www.nejm.org/')
br.select_form(name='frmLogin')
br['login'] = self.username
br['password'] = self.password
response = br.submit()
raw = response.read()
if '>Sign Out<' not in raw:
raise Exception('Login failed. Check your username and password')
return br
#TO GET ARTICLE TOC
def nejm_get_index(self):
return self.index_to_soup('http://content.nejm.org/current.dtl')
# To parse artice toc
def parse_index(self):
parse_soup = self.nejm_get_index()
feeds = []
div = parse_soup.find(attrs={'class':'tocContent'})
for group in div.findAll(attrs={'class':'articleGrouping'}):
feed_title = group.find(attrs={'class':'articleType'})
if feed_title is None:
continue
feed_title = self.tag_to_string(feed_title)
articles = []
self.log('Found section:', feed_title)
for art in group.findAll(attrs={'class':lambda x: x and 'articleEntry'
in x}):
link = art.find(attrs={'class':lambda x:x and 'articleLink' in
x})
if link is None:
continue
a = link.find('a', href=True)
if a is None:
continue
url = a.get('href')
if url.startswith('/'):
url = 'http://www.nejm.org'+url
title = self.tag_to_string(a)
self.log.info('\tFound article:', title, 'at', url)
article = {'title':title, 'url':url, 'date':''}
au = art.find(attrs={'class':'articleAuthors'})
if au is not None:
article['author'] = self.tag_to_string(au)
desc = art.find(attrs={'class':'hover_text'})
if desc is not None:
desc = self.tag_to_string(desc)
if 'author' in article:
desc = ' by ' + article['author'] + ' ' +desc
article['description'] = desc
articles.append(article)
if articles:
feeds.append((feed_title, articles))
return feeds