home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / scientific_american.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  92 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3.  
  4. import re
  5. from calibre.web.feeds.news import BasicNewsRecipe
  6.  
  7. class ScientificAmerican(BasicNewsRecipe):
  8.     title                 = u'Scientific American'
  9.     description           = u'Popular Science. Monthly magazine.'
  10.     category              = 'science'
  11.     __author__            = 'Starson17'
  12.     no_stylesheets        = True
  13.     use_embedded_content  = False
  14.     language              = 'en'
  15.     publisher             = 'Nature Publishing Group'
  16.     remove_empty_feeds    = True
  17.     remove_javascript     = True
  18.     oldest_article        = 30
  19.     max_articles_per_feed = 100
  20.  
  21.     conversion_options = {'linearize_tables'  : True
  22.                         , 'comment'           : description
  23.                         , 'tags'              : category
  24.                         , 'publisher'         : publisher
  25.                         , 'language'          : language
  26.                         }
  27.  
  28.     keep_only_tags = [
  29.                 dict(name='h2', attrs={'class':'articleTitle'})
  30.                 ,dict(name='p', attrs={'id':'articleDek'})
  31.                 ,dict(name='p', attrs={'class':'articleInfo'})
  32.                 ,dict(name='div', attrs={'id':['articleContent']})
  33.                 ,dict(name='img', attrs={'src':re.compile(r'/media/inline/blog/Image/', re.DOTALL|re.IGNORECASE)}) 
  34.                 ]
  35.  
  36.     remove_tags = [dict(name='a', attrs={'class':'tinyCommentCount'})]
  37.  
  38.     def parse_index(self):
  39.         soup = self.index_to_soup('http://www.scientificamerican.com/sciammag/')
  40.         issuetag = soup.find('p',attrs={'id':'articleDek'})
  41.         self.timefmt = ' [%s]'%(self.tag_to_string(issuetag))
  42.         img = soup.find('img', alt='Scientific American Magazine', src=True)
  43.         if img is not None:
  44.             self.cover_url = img['src']
  45.         features, feeds = [], []
  46.         for a in soup.find(attrs={'class':'primaryCol'}).findAll('a',attrs={'title':'Feature'}):
  47.             if a is None: continue
  48.             desc = ''
  49.             s = a.parent.parent.find(attrs={'class':'dek'})
  50.             desc = self.tag_to_string(s)
  51.             article = {
  52.                     'url' : a['href'],
  53.                     'title' : self.tag_to_string(a),
  54.                     'date' : '',
  55.                     'description' : desc,
  56.                     }
  57.             features.append(article)
  58.         feeds.append(('Features', features))
  59.         department = []
  60.         title = None
  61.         for li in soup.find(attrs={'class':'secondaryCol'}).findAll('li'):
  62.             if 'department.cfm' in li.a['href']:
  63.                 if department:
  64.                     feeds.append((title, department))
  65.                 title = self.tag_to_string(li.a)
  66.                 department = []
  67.             if 'article.cfm' in li.h3.a['href']:
  68.                 article = {
  69.                         'url' : li.h3.a['href'],
  70.                         'title' : self.tag_to_string(li.h3.a),
  71.                         'date': '',
  72.                         'description': self.tag_to_string(li.p),
  73.                     }
  74.                 department.append(article)
  75.         if department:
  76.             feeds.append((title, department))
  77.         return feeds
  78.  
  79.     def postprocess_html(self, soup, first_fetch):
  80.         for item in soup.findAll('a'):
  81.             if 'topic.cfm' in item['href']:
  82.                 item.replaceWith(item.string)
  83.         return soup
  84.  
  85.     extra_css = '''
  86.                 p{font-weight: normal; font-size:small}
  87.                 li{font-weight: normal; font-size:small}
  88.                 .headline p{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
  89.                 h2{font-size:large; font-family:Arial,Helvetica,sans-serif;}
  90.                 h3{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
  91.                 '''
  92.