home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / medscape.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  65 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __author__    = 'Tony Stegall'
  4. __copyright__ = '2010, Tony Stegall or Tonythebookworm on mobileread.com'
  5. __version__   = '1'
  6. __date__      = '01, October 2010'
  7. __docformat__ = 'English'
  8.  
  9.  
  10. from calibre.web.feeds.recipes import BasicNewsRecipe
  11.  
  12. class MedScrape(BasicNewsRecipe):
  13.  
  14.     title       = 'MedScape'
  15.     __author__  = 'Tony Stegall'
  16.     description = 'Nursing News'
  17.     language    = 'en'
  18.     timefmt = ' [%a, %d %b, %Y]'
  19.     needs_subscription = True
  20.     masthead_url = 'http://images.medscape.com/pi/global/header/sp/bg-sp-medscape.gif'
  21.     no_stylesheets = True
  22.     remove_javascript   = True
  23.     conversion_options = {'linearize_tables' : True}
  24.     extra_css = '''
  25.                     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
  26.  
  27.  
  28.                     p.authors{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
  29.                     p.postingdate{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
  30.                     h2{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
  31.  
  32.  
  33.                     p{font-family:Helvetica,Arial,sans-serif;font-size:small;}
  34.                 '''
  35.  
  36.     remove_tags = [dict(name='div', attrs={'class':['closewindow2']}),
  37.                    dict(name='div', attrs={'id': ['basicheaderlinks']})
  38.                   ]
  39.  
  40.     def get_browser(self):
  41.         br = BasicNewsRecipe.get_browser()
  42.         if self.username is not None and self.password is not None:
  43.             br.open('https://profreg.medscape.com/px/getlogin.do')
  44.             br.select_form(name='LoginForm')
  45.             br['userId']   = self.username
  46.             br['password'] = self.password
  47.             br.submit()
  48.         return br
  49.  
  50.     feeds = [
  51.               ('MedInfo', 'http://www.medscape.com/cx/rssfeeds/2685.xml'),
  52.             ]
  53.  
  54.     def print_version(self,url):
  55.         #the original url is: http://www.medscape.com/viewarticle/728955?src=rss
  56.         #the print    url is: http://www.medscape.com/viewarticle/728955_print
  57.          print_url = url.partition('?')[0] +'_print'
  58.          #print 'the printable version is: ',print_url
  59.          return print_url
  60.  
  61.     def preprocess_html(self, soup):
  62.         for item in soup.findAll(attrs={'style':True}):
  63.             del item['style']
  64.         return soup
  65.