home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / psych.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  45 lines

  1.  
  2. from calibre.ptempfile import PersistentTemporaryFile
  3. from calibre.web.feeds.news import BasicNewsRecipe
  4.  
  5. class AdvancedUserRecipe1275708473(BasicNewsRecipe):
  6.     title          = u'Psychology Today'
  7.     _author__ = 'rty'
  8.     publisher = u'www.psychologytoday.com'
  9.     category = u'Psychology'
  10.     max_articles_per_feed = 100
  11.     remove_javascript = True
  12.     use_embedded_content   = False
  13.     no_stylesheets = True
  14.     language = 'en'
  15.     temp_files = []
  16.     articles_are_obfuscated = True
  17.     remove_tags = [
  18.                     dict(name='div', attrs={'class':['print-source_url','field-items','print-footer']}),
  19.                     dict(name='span', attrs={'class':'print-footnote'}),
  20.                   ]
  21.     remove_tags_before  = dict(name='h1', attrs={'class':'print-title'})
  22.     remove_tags_after     = dict(name='div', attrs={'class':['field-items','print-footer']})
  23.  
  24.     feeds          = [(u'Contents', u'http://www.psychologytoday.com/articles/index.rss')]
  25.  
  26.     def get_article_url(self, article):
  27.        return article.get('link',  None)
  28.  
  29.     def get_obfuscated_article(self, url):
  30.         br = self.get_browser()
  31.         br.open(url)
  32.         response = br.follow_link(url_regex = r'/print/[0-9]+', nr = 0)
  33.         html = response.read()
  34.         self.temp_files.append(PersistentTemporaryFile('_fa.html'))
  35.         self.temp_files[-1].write(html)
  36.         self.temp_files[-1].close()
  37.         return self.temp_files[-1].name
  38.  
  39.     def get_cover_url(self):
  40.         index = 'http://www.psychologytoday.com/magazine/'
  41.         soup = self.index_to_soup(index)
  42.         for image in soup.findAll('img',{ "class" : "imagefield imagefield-field_magazine_cover" }):
  43.               return image['src'] + '.jpg'
  44.         return None
  45.