Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / noticias_r7.recipe < prev next >

Wrap

Text File | 2011-09-09 | 2KB | 41 lines

import re from calibre.web.feeds.news import BasicNewsRecipe class PortalR7(BasicNewsRecipe): title = 'Noticias R7' __author__ = 'Diniz Bortolotto' description = 'Noticias Portal R7' oldest_article = 2 max_articles_per_feed = 20 encoding = 'utf8' publisher = 'Rede Record' category = 'news, Brazil' language = 'pt_BR' publication_type = 'newsportal' use_embedded_content = False no_stylesheets = True remove_javascript = True remove_attributes = ['style'] feeds = [ (u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'), (u'Economia', u'http://www.r7.com/data/rss/economia.xml'), (u'Internacional', u'http://www.r7.com/data/rss/internacional.xml'), (u'Tecnologia e Ci\xeancia', u'http://www.r7.com/data/rss/tecnologiaCiencia.xml') ] reverse_article_order = True keep_only_tags = [dict(name='div', attrs={'class':'materia'})] remove_tags = [ dict(id=['espalhe', 'report-erro']), dict(name='ul', attrs={'class':'controles'}), dict(name='ul', attrs={'class':'relacionados'}), dict(name='div', attrs={'class':'materia_banner'}), dict(name='div', attrs={'class':'materia_controles'}) ] preprocess_regexps = [ (re.compile(r'<div class="materia">.*<div class="materia_cabecalho">',re.DOTALL|re.IGNORECASE), lambda match: '<div class="materia"><div class="materia_cabecalho">') ]