home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
cyberpresse.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
3KB
|
57 lines
from calibre.web.feeds.news import BasicNewsRecipe
class Cyberpresse(BasicNewsRecipe):
title = u'Cyberpresse'
__author__ = 'balok and Sujata Raman'
description = 'Canadian news in French'
language = 'fr'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
html2lrf_options = ['--left-margin=0','--right-margin=0','--top-margin=0','--bottom-margin=0']
encoding = 'utf-8'
keep_only_tags = [dict(name='div', attrs={'class':'article-page'}),
dict(name='div', attrs={'id':'articlePage'}),
]
extra_css = '''
.photodata{font-family:Arial,Helvetica,Verdana,sans-serif;color: #999999; font-size: 90%; }
h1{font-family:Georgia,Times,serif ; font-size: large; }
.amorce{font-family:Arial,Helvetica,Verdana,sans-serif; font-weight:bold;}
.article-page{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
#articlePage{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
.auteur{font-family:Georgia,Times,sans-serif; font-size: 90%; color:#006699 ;}
.bodyText{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
.byLine{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%;}
.entry{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: x-small;}
.minithumb-auteurs{font-family:Arial,Helvetica,Verdana,sans-serif; font-size: 90%; }
a{color:#003399; font-weight:bold; }
'''
remove_tags = [
dict(name='div', attrs={'class':['centerbar','colspan','share-module']}),
dict(name='p', attrs={'class':['zoom']}),
dict(name='ul', attrs={'class':['stories']}),
dict(name='h4', attrs={'class':['general-cat']}),
]
feeds = [(u'Manchettes', u'http://www.cyberpresse.ca/rss/225.xml'),
(u'Capitale nationale', u'http://www.cyberpresse.ca/rss/501.xml'),
(u'Opinions', u'http://www.cyberpresse.ca/rss/977.xml'),
(u'Insolite', u'http://www.cyberpresse.ca/rss/279.xml')
]
def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['i','strong']):
tag.name = 'div'
return soup