home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / epicurious.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  2.2 KB  |  59 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2010, Starson17'
  5. '''
  6. www.epicurious.com
  7. '''
  8. import re
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class Epicurious(BasicNewsRecipe):
  12.     title          = u'Epicurious'
  13.     __author__  = 'Starson17'
  14.     description = 'Food and Recipes from Epicurious'
  15.     cover_url     = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
  16.     publisher      = 'Epicurious'
  17.     tags           = 'news, food, gourmet, recipes'
  18.     language = 'en'
  19.     use_embedded_content    = False
  20.     no_stylesheets        = True
  21.     remove_javascript = True
  22.     recursions = 3
  23.     oldest_article        = 14
  24.     max_articles_per_feed = 20
  25.  
  26.     keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
  27.                       dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
  28.                            ]
  29.  
  30.     remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
  31.                    {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
  32.                    dict(name='div', attrs={'class':['tagged','comments']})
  33.                    ]
  34.  
  35.     remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
  36.  
  37.     feeds = [
  38.              (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
  39.              (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
  40.              (u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
  41.              (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
  42.              ]
  43.  
  44.     match_regexps = [
  45.                      r'http://www.epicurious.com/.*recipes/.*/views'
  46.                      ]
  47.  
  48.     preprocess_regexps = [
  49.         (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
  50.         (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
  51.         (re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
  52.         ]
  53.  
  54.     def postprocess_html(self, soup, first_fetch):
  55.         for t in soup.findAll(['table', 'tr', 'td']):
  56.             t.name = 'div'
  57.         return soup
  58.  
  59.