Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / cnetjapan_digital.recipe < prev next >

Wrap

Text File | 2011-09-09 | 2KB | 50 lines

import re from calibre.web.feeds.news import BasicNewsRecipe class CNetJapanDigital(BasicNewsRecipe): title = u'CNET Japan Digital' oldest_article = 3 max_articles_per_feed = 30 __author__ = 'Hiroshi Miura' feeds = [(u'CNet digital',u'http://feed.japan.cnet.com/rss/digital/index.rdf') ] language = 'ja' encoding = 'Shift_JIS' remove_javascript = True preprocess_regexps = [ (re.compile(ur'.*</body>', re.DOTALL|re.IGNORECASE|re.UNICODE), lambda match: '</body>'), (re.compile(r'.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'.*', re.UNICODE), lambda match: ''), ] remove_tags_before = dict(id="contents_l") remove_tags = [ {'class':"social_bkm_share"}, {'class':"social_bkm_print"}, {'class':"block20 clearfix"}, dict(name="div",attrs={'id':'bookreview'}), {'class':"tag_left_ttl"}, {'class':"tag_right"} ] remove_tags_after = {'class':"block20"} def parse_feeds(self): feeds = BasicNewsRecipe.parse_feeds(self) for curfeed in feeds: delList = [] for a,curarticle in enumerate(curfeed.articles): if re.search(r'pheedo.jp', curarticle.url): delList.append(curarticle) if len(delList)>0: for d in delList: index = curfeed.articles.index(d) curfeed.articles[index:index+1] = [] return feeds