home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
technology_review.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
2KB
|
67 lines
import string
from calibre.web.feeds.news import BasicNewsRecipe
class TechnologyReview(BasicNewsRecipe):
title = u'Technology Review'
__author__ = 'rty'
description = 'MIT Technology Magazine'
publisher = 'Technology Review Inc.'
category = 'Technology, Innovation, R&D'
language = 'en'
oldest_article = 14
max_articles_per_feed = 100
No_stylesheets = True
extra_css = """
.ArticleBody {font: normal; text-align: justify}
.headline {font: bold x-large}
.subheadline {font: italic large}
"""
feeds = [
(u'Computing', u'http://feeds.technologyreview.com/technology_review_Computing'),
(u'Web', u'http://feeds.technologyreview.com/technology_review_Web'),
(u'Communications', u'http://feeds.technologyreview.com/technology_review_Communications'),
(u'Energy', u'http://feeds.technologyreview.com/technology_review_Energy'),
(u'Materials', u'http://feeds.technologyreview.com/technology_review_Materials'),
(u'Biomedicine', u'http://feeds.technologyreview.com/technology_review_Biotech'),
(u'Business', u'http://feeds.technologyreview.com/technology_review_Biztech')
]
remove_attributes = ['width', 'align','cellspacing']
remove_tags = [
dict(name='div', attrs={'id':['CloseLink','footerAdDiv','copyright']}),
]
remove_tags_after = [dict(name='div', attrs={'id':'copyright'})]
def get_article_url(self, article):
return article.get('guid', article.get('id', None))
def print_version(self, url):
baseurl='http://www.technologyreview.com/printer_friendly_article.aspx?id='
split1 = string.split(url,"/")
xxx=split1 [4]
split2= string.split(xxx,"/")
s = baseurl + split2[0]
return s
def postprocess_html(self,soup, True):
#remove picture
headerhtml = soup.find(True, {'class':'header'})
headerhtml.replaceWith("")
#remove close button
closehtml = soup.find(True, {'class':'close'})
closehtml.replaceWith("")
#remove banner advertisement
bannerhtml = soup.find(True, {'class':'bannerad'})
bannerhtml.replaceWith("")
#thanks kiklop74! This code removes all links from the text
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup