home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / technology_review.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  67 lines

  1. import string
  2. from calibre.web.feeds.news import BasicNewsRecipe
  3.  
  4. class TechnologyReview(BasicNewsRecipe):
  5.     title          = u'Technology Review'
  6.     __author__ = 'rty'
  7.     description = 'MIT Technology Magazine'
  8.     publisher = 'Technology Review Inc.'
  9.     category = 'Technology, Innovation, R&D'
  10.     language = 'en'
  11.     oldest_article = 14
  12.     max_articles_per_feed = 100
  13.     No_stylesheets = True
  14.     extra_css = """
  15.     .ArticleBody {font: normal; text-align: justify}
  16.     .headline {font: bold x-large}
  17.     .subheadline {font: italic large}
  18.     """
  19.     feeds = [
  20.     (u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
  21.     (u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
  22.     (u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
  23.     (u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
  24.     (u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
  25.     (u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
  26.     (u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
  27.     ]
  28.     remove_attributes = ['width', 'align','cellspacing']
  29.  
  30.     remove_tags = [
  31.                     dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}),
  32.                          ]
  33.     remove_tags_after = [dict(name='div', attrs={'id':'copyright'})]
  34.  
  35.     def get_article_url(self, article):
  36.         return article.get('guid', article.get('id', None))
  37.  
  38.     def print_version(self, url):
  39.         baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
  40.         split1 = string.split(url,"/")
  41.         xxx=split1 [4]
  42.         split2= string.split(xxx,"/")
  43.         s =  baseurl + split2[0]
  44.         return s
  45.  
  46.  
  47.     def postprocess_html(self,soup, True):
  48.         #remove picture
  49.         headerhtml = soup.find(True, {'class':'header'})
  50.         headerhtml.replaceWith("")
  51.  
  52.         #remove close button
  53.         closehtml = soup.find(True, {'class':'close'})
  54.         closehtml.replaceWith("")
  55.  
  56.         #remove banner advertisement
  57.         bannerhtml = soup.find(True, {'class':'bannerad'})
  58.         bannerhtml.replaceWith("")
  59.  
  60.         #thanks kiklop74!  This code removes all links from the text
  61.         for alink in soup.findAll('a'):
  62.             if alink.string is not None:
  63.                tstr = alink.string
  64.                alink.replaceWith(tstr)
  65.  
  66.         return soup
  67.