home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / physics_today.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  40 lines

  1. import re
  2. from calibre.web.feeds.news import BasicNewsRecipe
  3. from calibre import strftime
  4.  
  5. class Physicstoday(BasicNewsRecipe):
  6.     title          = u'Physicstoday'
  7.     __author__  = 'Hypernova'
  8.     description           = u'Physics Today magazine'
  9.     publisher             = 'American Institute of Physics'
  10.     category              = 'Physics'
  11.     language              = 'en'
  12.     cover_url = strftime('http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg')
  13.     oldest_article = 30
  14.     max_articles_per_feed = 100
  15.     no_stylesheets        = True
  16.     use_embedded_content  = False
  17.     needs_subscription = True
  18.     remove_javascript     = True
  19.     remove_tags_before = dict(name='h1')
  20.     remove_tags =  [dict(name='div', attrs={'class':'highslide-footer'})]
  21.     remove_tags =  [dict(name='div', attrs={'class':'highslide-header'})]
  22.     #remove_tags =  [dict(name='a', attrs={'class':'highslide'})]
  23.     preprocess_regexps = [
  24.    #(re.compile(r'<!--start PHTOAD_tail.jsp -->.*</body>', re.DOTALL|re.IGNORECASE),
  25.    (re.compile(r'<!-- END ARTICLE and footer section -->.*</body>', re.DOTALL|re.IGNORECASE),
  26.     lambda match: '</body>'),
  27. ]
  28.  
  29.     def get_browser(self):
  30.         br = BasicNewsRecipe.get_browser()
  31.         if self.username is not None and self.password is not None:
  32.             br.open('http://ptonline.aip.org/journals/doc/PHTOAD-home/pt_login.jsp?fl=f')
  33.             br.select_form(name='login_form')
  34.             br['username'] = self.username
  35.             br['password'] = self.password
  36.             br.submit()
  37.         return br
  38.  
  39.     feeds          = [(u'All', u'http://www.physicstoday.org/feed.xml')]
  40.