home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
-
- __license__ = 'GPL v3'
- __copyright__ = '2010, Starson17'
- '''
- www.epicurious.com
- '''
- import re
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class Epicurious(BasicNewsRecipe):
- title = u'Epicurious'
- __author__ = 'Starson17'
- description = 'Food and Recipes from Epicurious'
- cover_url = 'http://up6.podbean.com/image-logos/21849_logo.jpg'
- publisher = 'Epicurious'
- tags = 'news, food, gourmet, recipes'
- language = 'en'
- use_embedded_content = False
- no_stylesheets = True
- remove_javascript = True
- recursions = 3
- oldest_article = 14
- max_articles_per_feed = 20
-
- keep_only_tags = [dict(name='div', attrs={'class':['mainconsolewrapper','videoheader','content_unit','entry-content','see_more_block']}),
- dict(name='div', attrs={'id':['headline','introBlock','ingredients','preparation','articleContent','in_categories_block']})
- ]
-
- remove_tags = [{'id':['printShoppingList','addnoteLnk','btnUploadVideo','enlarge_image']},
- {'class':['subLnk','sbmWrapper','detail_division','entry-footer','comment-footer']},
- dict(name='div', attrs={'class':['tagged','comments']})
- ]
-
- remove_tags_after = [dict(name='div', attrs={'class':'entry-content'})]
-
- feeds = [
- (u'Recipes: Healthy dinner ', u'http://feeds.epicurious.com/healthy_recipes'),
- (u'New Recipes ', u'http://feeds.epicurious.com/newrecipes'),
- (u'Features ', u'http://feeds.epicurious.com/latestfeatures'),
- (u'Blogs ', u'http://feeds.feedburner.com/epicurious/epiblog')
- ]
-
- match_regexps = [
- r'http://www.epicurious.com/.*recipes/.*/views'
- ]
-
- preprocess_regexps = [
- (re.compile(r'/\n', re.DOTALL|re.IGNORECASE), lambda match: '/'),
- (re.compile(r'_116.jpg', re.DOTALL|re.IGNORECASE), lambda match: '.jpg'),
- (re.compile('<div class=\"comments\".*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>')
- ]
-
- def postprocess_html(self, soup, first_fetch):
- for t in soup.findAll(['table', 'tr', 'td']):
- t.name = 'div'
- return soup
-
-