home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / the_budget_fashionista.recipe < prev    next >
Text File  |  2011-09-09  |  1KB  |  45 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. www.thebudgetfashionista.com
  7. '''
  8.  
  9. from calibre.web.feeds.recipes import BasicNewsRecipe
  10.  
  11. class TheBudgetFashionista(BasicNewsRecipe):
  12.     title                  = 'The Budget Fashionista'
  13.     __author__             = 'Darko Miletic'
  14.     description            = 'Saving your money since 2003'
  15.     oldest_article         = 7
  16.     max_articles_per_feed  = 100
  17.     no_stylesheets         = True
  18.     use_embedded_content   = False
  19.     encoding               = 'utf-8'
  20.     publisher              = 'TBF GROUP, LLC.'
  21.     category               = 'news, fashion, comsetics, women'
  22.     lang                   = 'en-US'
  23.     language = 'en'
  24.  
  25.  
  26.     conversion_options = {
  27.           'comment'          : description
  28.         , 'tags'             : category
  29.         , 'publisher'        : publisher
  30.         , 'language'         : lang
  31.     }
  32.  
  33.     keep_only_tags = [dict(name='div', attrs={'class':'columnLeft'})]
  34.     remove_tags_after = dict(name='div', attrs={'class':'postDetails'})
  35.     remove_tags = [dict(name=['object','link','script','iframe','form','login-button'])]
  36.  
  37.     feeds = [(u'Articles', u'http://www.thebudgetfashionista.com/feeds/atom/')]
  38.  
  39.     def preprocess_html(self, soup):
  40.         for it in soup.findAll('img'):
  41.             if it.parent.name == 'a':
  42.                it.parent.name = 'div'
  43.         return soup;
  44.  
  45.