home *** CD-ROM | disk | FTP | other *** search
- from calibre.web.feeds.news import BasicNewsRecipe
- import re
-
- class Explosm(BasicNewsRecipe):
- title = u'Explosm Rotated'
- __author__ = 'Andromeda Rabbit'
- description = 'Explosm'
- language = 'en'
- use_embedded_content = False
- no_stylesheets = True
- oldest_article = 24
- remove_javascript = True
- remove_empty_feeds = True
- max_articles_per_feed = 10
-
- feeds = [
- (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
- ]
-
- #match_regexps = [r'http://www.explosm.net/comics/.*']
-
- keep_only_tags = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
- remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
-
- extra_css = '''
- h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
- h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
- p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
- body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
-
- def get_cover_url(self):
- return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
-
- def parse_feeds(self):
- feeds = BasicNewsRecipe.parse_feeds(self)
-
- for curfeed in feeds:
- delList = []
- for a,curarticle in enumerate(curfeed.articles):
- if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
- delList.append(curarticle)
- if len(delList)>0:
- for d in delList:
- index = curfeed.articles.index(d)
- curfeed.articles[index:index+1] = []
-
- return feeds
-
- def skip_ad_pages(self, soup):
- # Skip ad pages served before actual article
- skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
- if skip_tag is None:
- return soup
- return None
-