home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / lrb_payed.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  3.1 KB  |  76 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. lrb.co.uk
  6. '''
  7. from calibre import strftime
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class LondonReviewOfBooksPayed(BasicNewsRecipe):
  11.     title                 = 'London Review of Books'
  12.     __author__            = 'Darko Miletic'
  13.     description           = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers'
  14.     category              = 'news, literature, UK'
  15.     publisher             = 'LRB Ltd.'
  16.     max_articles_per_feed = 100
  17.     language              = 'en_GB'
  18.     no_stylesheets        = True
  19.     delay                 = 1
  20.     use_embedded_content  = False
  21.     encoding              = 'utf-8'
  22.     INDEX                 = 'http://www.lrb.co.uk'
  23.     LOGIN                 = INDEX + '/login'
  24.     masthead_url          = INDEX + '/assets/images/lrb_logo_big.gif'
  25.     needs_subscription    = True
  26.     publication_type      = 'magazine'
  27.     extra_css             = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} '
  28.  
  29.  
  30.     def get_browser(self):
  31.         br = BasicNewsRecipe.get_browser()
  32.         if self.username is not None and self.password is not None:
  33.             br.open(self.LOGIN)
  34.             br.select_form(nr=1)
  35.             br['username'] = self.username
  36.             br['password'] = self.password
  37.             br.submit()
  38.         return br
  39.  
  40.     def parse_index(self):
  41.         articles = []
  42.         soup = self.index_to_soup(self.INDEX)
  43.         cover_item = soup.find('p',attrs={'class':'cover'})
  44.         lrbtitle = self.title
  45.         if  cover_item:
  46.             self.cover_url = self.INDEX + cover_item.a.img['src']
  47.             content = self.INDEX + cover_item.a['href']
  48.             soup2 = self.index_to_soup(content)
  49.             sitem = soup2.find(attrs={'class':'article-list'})
  50.             lrbtitle = soup2.head.title.string
  51.             for item in sitem.findAll('a',attrs={'class':'title'}):
  52.                 description = u''
  53.                 title_prefix = u''
  54.                 feed_link = item
  55.                 if feed_link.has_key('href'):
  56.                     url   = self.INDEX + feed_link['href']
  57.                     title = title_prefix + self.tag_to_string(feed_link)
  58.                     date  = strftime(self.timefmt)
  59.                     articles.append({
  60.                                       'title'      :title
  61.                                      ,'date'       :date
  62.                                      ,'url'        :url
  63.                                      ,'description':description
  64.                                     })
  65.         return [(lrbtitle, articles)]
  66.  
  67.     conversion_options = {
  68.                              'comments'  : description
  69.                             ,'tags'      : category
  70.                             ,'language'  : language
  71.                             ,'publisher' : publisher
  72.                          }
  73.  
  74.     keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})]
  75.     remove_attributes = ['width','height']
  76.