home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / big_oven.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  74 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2. import re
  3.  
  4. class BigOven(BasicNewsRecipe):
  5.     title               = 'BigOven'
  6.     __author__          = 'Starson17'
  7.     description         = 'Recipes for the Foodie in us all. Registration is free. A fake username and password just gives smaller photos.'
  8.     language            = 'en'
  9.     category            = 'news, food, recipes, gourmet'
  10.     publisher           = 'Starson17'
  11.     use_embedded_content= False
  12.     no_stylesheets      = True
  13.     oldest_article      = 24
  14.     remove_javascript   = True
  15.     remove_empty_feeds    = True
  16.     cover_url           = 'http://www.software.com/images/products/BigOven%20Logo_177_216.JPG'
  17.     max_articles_per_feed = 30
  18.     needs_subscription = True
  19.  
  20.     conversion_options = {'linearize_tables'  : True
  21.                         , 'comment'           : description
  22.                         , 'tags'              : category
  23.                         , 'publisher'         : publisher
  24.                         , 'language'          : language
  25.                         }
  26.  
  27.     def get_browser(self):
  28.         br = BasicNewsRecipe.get_browser()
  29.         if self.username is not None and self.password is not None:
  30.             br.open('http://www.bigoven.com/account/login?ReturnUrl=/')
  31.             br.select_form(nr=1)
  32.             br['Email']  = self.username
  33.             br['Password'] = self.password
  34.             br.submit()
  35.         return br
  36.  
  37.     remove_attributes = ['style', 'font']
  38.  
  39.     def get_article_url(self, article):
  40.         url = article.get('feedburner_origlink',article.get('link', None))
  41.         front, middle, end = url.partition('comhttp//www.bigoven.com')
  42.         url = front + 'com' + end
  43.         return url
  44.  
  45.     keep_only_tags = [dict(name='div', attrs={'id':['nosidebar_main']})]
  46.  
  47.     remove_tags_after = [dict(name='div', attrs={'class':['display-field']})]
  48.  
  49.     remove_tags =  [dict(name='ul', attrs={'class':['tabs']})]
  50.  
  51.     preprocess_regexps = [
  52.         (re.compile(r'Want detailed nutrition information?', re.DOTALL), lambda match: ''),
  53.         (re.compile('\(You could win \$100 in our ', re.DOTALL), lambda match: ''),
  54.          ]
  55.  
  56.     def preprocess_html(self, soup):
  57.         for tag in soup.findAll(name='a', text=re.compile(r'.*View Metric.*', re.DOTALL)):
  58.             tag.parent.parent.extract()
  59.         for tag in soup.findAll(text=re.compile(r'.*Try BigOven Pro for Free.*', re.DOTALL)):
  60.             tag.extract()
  61.         for tag in soup.findAll(text=re.compile(r'.*Add my photo of this recipe.*', re.DOTALL)):
  62.             tag.parent.extract()
  63.         for tag in soup.findAll(name='a', text=re.compile(r'.*photo contest.*', re.DOTALL)):
  64.             tag.parent.extract()
  65.         for tag in soup.findAll(name='a', text='Remove ads'):
  66.             tag.parent.parent.extract()
  67.         for tag in soup.findAll(name='ol', attrs={'class':['recipe-tags']}):
  68.             tag.parent.extract()
  69.         return soup
  70.  
  71.     feeds = [(u'Recent Raves', u'http://www.bigoven.com/rss/recentraves'),
  72.                    (u'Recipe Of The Day', u'http://feeds.feedburner.com/bigovencom-RecipeOfTheDay')]
  73.  
  74.