home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / drivelry.recipe < prev    next >
Text File  |  2011-09-09  |  1KB  |  42 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2. from calibre.ebooks.BeautifulSoup import BeautifulSoup
  3.  
  4. class drivelrycom(BasicNewsRecipe):
  5.     title          = u'drivelry.com'
  6.     language       = 'en'
  7.     description    = 'A blog by Mike Abrahams'
  8.     __author__     = 'Krittika Goyal'
  9.     oldest_article = 60 #days
  10.     max_articles_per_feed = 25
  11.     #encoding = 'latin1'
  12.  
  13.     remove_stylesheets = True
  14.     #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
  15.     remove_tags_after  = dict(name='div', attrs={'id':'bookmark'})
  16.     remove_tags = [
  17.        dict(name='iframe'),
  18.        dict(name='div', attrs={'class':['sidebar']}),
  19.        dict(name='div', attrs={'id':['bookmark']}),
  20.        #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
  21.        #dict(name='ul', attrs={'class':'articleTools'}),
  22.     ]
  23.  
  24.     feeds          = [
  25. ('drivelry.com',
  26.  'http://feeds.feedburner.com/drivelry'),
  27.  
  28. ]
  29.  
  30.     def preprocess_html(self, soup):
  31.         story = soup.find(name='div', attrs={'id':'main'})
  32.         #td = heading.findParent(name='td')
  33.         #td.extract()
  34.         soup = BeautifulSoup('''
  35. <html><head><title>t</title></head><body>
  36. <p>To donate to this blog: <a href="http://www.drivelry.com/thank-you/">click here</a></p>
  37. </body></html>
  38. ''')
  39.         body = soup.find(name='body')
  40.         body.insert(0, story)
  41.         return soup
  42.