home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / explosm.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  2.1 KB  |  55 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2. import re
  3.  
  4. class Explosm(BasicNewsRecipe):
  5.     title              = u'Explosm Rotated'
  6.     __author__        = 'Andromeda Rabbit'
  7.     description      = 'Explosm'
  8.     language            = 'en'
  9.     use_embedded_content = False
  10.     no_stylesheets    = True
  11.     oldest_article    = 24
  12.     remove_javascript   = True
  13.     remove_empty_feeds  = True
  14.     max_articles_per_feed = 10
  15.  
  16.     feeds = [
  17.              (u'Explosm Feed', u'http://feeds.feedburner.com/Explosm')
  18.              ]
  19.  
  20.     #match_regexps = [r'http://www.explosm.net/comics/.*']
  21.  
  22.     keep_only_tags   = [dict(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})]
  23.     remove_tags = [dict(name='div'), dict(name='span'), dict(name='table'), dict(name='br'), dict(name='nobr'), dict(name='a'), dict(name='b')]
  24.  
  25.     extra_css = '''
  26.                     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
  27.                     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
  28.                     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  29.                     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}'''
  30.  
  31.     def get_cover_url(self):
  32.         return 'http://cdn.shopify.com/s/files/1/0059/1872/products/cyanidetitle_large.jpg?1295846286'
  33.  
  34.     def parse_feeds(self):
  35.         feeds = BasicNewsRecipe.parse_feeds(self)
  36.  
  37.         for curfeed in feeds:
  38.             delList = []
  39.             for a,curarticle in enumerate(curfeed.articles):
  40.                 if re.search(r'http://www.explosm.net/comics', curarticle.url) == None:
  41.                     delList.append(curarticle)
  42.             if len(delList)>0:
  43.                 for d in delList:
  44.                     index = curfeed.articles.index(d)
  45.                     curfeed.articles[index:index+1] = []
  46.  
  47.         return feeds
  48.  
  49.     def skip_ad_pages(self, soup):
  50.         # Skip ad pages served before actual article
  51.         skip_tag = soup.find(name='img', attrs={'alt':'Cyanide and Happiness, a daily webcomic'})
  52.         if skip_tag is None:
  53.             return soup
  54.         return None
  55.