Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / xkcd.recipe < prev next >

Wrap

Text File | 2011-09-09 | 1KB | 44 lines

__license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' ''' Fetch xkcd. ''' import time, re from calibre.web.feeds.news import BasicNewsRecipe class XkcdCom(BasicNewsRecipe): title = 'xkcd' description = 'A webcomic of romance and math humor.' __author__ = 'Martin Pitt' language = 'en' use_embedded_content = False oldest_article = 60 keep_only_tags = [dict(id='middleContent')] remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')] no_stylesheets = True # turn image bubblehelp into a paragraph preprocess_regexps = [ (re.compile(r'(<img.*title=")([^"]+)(".*>)'), lambda m: '%s%s<p>%s</p>' % (m.group(1), m.group(3), m.group(2))) ] def parse_index(self): INDEX = 'http://xkcd.com/archive/' soup = self.index_to_soup(INDEX) articles = [] for item in soup.findAll('a', title=True): articles.append({ 'date': item['title'], 'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1, 'url': 'http://xkcd.com' + item['href'], 'title': self.tag_to_string(item), 'description': '', 'content': '', }) return [('xkcd', articles)]