home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / scprint.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  74 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe, LoginFailed
  2.  
  3. class SCPrintMagazine(BasicNewsRecipe):
  4.     title          = u'SC Print Magazine'
  5.     __author__ = u'Tony Maro'
  6.     description = u'Last print version of the data security magazine'
  7.     INDEX = "http://www.scmagazineus.com/issuearchive/"
  8.     no_stylesheets = True
  9.     language = 'en'
  10.     keep_only_tags = [dict(id=['article','review'])]
  11.     remove_tags = [dict(id=['articlePrintTools','reviewBodyColumn'])]
  12.     LOG_IN = 'http://www.scmagazineus.com/login/'
  13.     tags = 'News,SC Magazine'
  14.     needs_subscription = True
  15.  
  16.     def parse_index(self):
  17.         articles = []
  18.         issuelink = printsections = None
  19.  
  20.         soup = self.index_to_soup(self.INDEX)
  21.         sectit = soup.find('div', attrs={'class':'issueArchiveItem'})
  22.         if sectit is not None:
  23.             linkt = sectit.find('a')
  24.             issuelink = linkt['href']
  25.             imgt = sectit.find('img')
  26.             self.cover_url = imgt['src']
  27.  
  28.         if issuelink is not None:
  29.             issue = self.index_to_soup(issuelink)
  30.             if issue is not None:
  31.                 printsections = issue.findAll('div',attrs={'class':'PrintSection'})
  32.         if printsections is not None:
  33.             for printsection in printsections:
  34.                 onesection = []
  35.                 sectiontitle = printsection.find('h3').contents[0]
  36.                 articlesec = printsection.findAll('div',attrs={'class':'IssueArchiveFormat'})
  37.                 if articlesec is not None:
  38.                     ''' got articles '''
  39.                     for onearticle in articlesec:
  40.                         ''' process one article '''
  41.                         arttitlet = onearticle.find('h3')
  42.                         if arttitlet is not None:
  43.                             mylink = arttitlet.find('a')
  44.                             if mylink is not None:
  45.                                 if mylink.has_key('title'):
  46.                                     arttitle = mylink['title']
  47.                                 else:
  48.                                     arttitle = 'unknown'
  49.                                 if mylink.has_key('href'):
  50.                                     artlink = mylink['href']
  51.                                     artlink = artlink.replace("/article","/printarticle")
  52.                                     artlink = artlink.replace("/review","/printreview")
  53.                                     deck = onearticle.find('div',attrs={'class':'deck'})
  54.                                     if deck is not None:
  55.                                         deck = deck.contents[0]
  56.                                         onesection.append({'title':arttitle, 'url':artlink, 'description':deck,'date':''})
  57.                     articles.append((sectiontitle, onesection))
  58.  
  59.         return articles
  60.  
  61.     def get_browser(self):
  62.         br = BasicNewsRecipe.get_browser(self)
  63.         br.open(self.LOG_IN)
  64.         br.select_form(name='aspnetForm')
  65.         br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtEmail'] = self.username
  66.         br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtPassword'] = self.password
  67.         raw = br.submit("ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$btnLogin").read()
  68.         if 'Logout</a>' not in raw:
  69.             raise LoginFailed(
  70.                     _('Failed to log in, check your username and password for'
  71.                     ' the calibre Periodicals service.'))
  72.         return br
  73.  
  74.