home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / inc.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  73 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2. import re
  3.  
  4. class IncMagazineRecipe(BasicNewsRecipe):
  5.     __license__  = 'GPL v3'
  6.     __author__ = 'kwetal'
  7.     language = 'en'
  8.     version = 1
  9.  
  10.     title = u'Inc Magazine'
  11.     publisher = u'Mansueto Ventures LLC'
  12.     category = u'News, Business'
  13.     description = u'Handbook of the American Entrepeneur'
  14.  
  15.     use_embedded_content = False
  16.     remove_empty_feeds = True
  17.  
  18.     no_stylesheets = True
  19.     remove_javascript = True
  20.  
  21.     INDEX = 'http://www.inc.com/magazine'
  22.  
  23.     remove_tags = []
  24.     remove_tags.append(dict(name = 'div', attrs = {'id' : 'advt'}))
  25.  
  26.     extra_css = '''
  27.                 body{font-family:verdana,arial,helvetica,geneva,sans-serif ;}
  28.                 div#deck {font-weight: bold;}
  29.                 div.byline {font-size: x-small; color: #696969; margin-top: 0.4em;}
  30.                 '''
  31.  
  32.     def parse_index(self):
  33.         soup = self.index_to_soup(self.INDEX)
  34.         self.browser.open(self.INDEX)
  35.  
  36.         url = self.browser.geturl()
  37.         date = url.rpartition('/')[0].rpartition('/')[2]
  38.         self.title = self.title + ' ' + date[4:6] + ', ' + date[0:4]
  39.  
  40.         answer = []
  41.  
  42.         for feature in soup.findAll('div', attrs = {'class': re.compile('magazinesection.*')}):
  43.             h2 = feature.find('h2')
  44.             if h2:
  45.                 feedTitle = self.tag_to_string(h2)
  46.             else:
  47.                 img = feature.find('img', attrs = {'class': 'howtohead'})
  48.                 if img:
  49.                     feedTitle = img['alt']
  50.                 else:
  51.                     feedTitle = 'Unknown Feature'
  52.  
  53.             articles = []
  54.             for div in feature.findAll('div', attrs = {'class': re.compile('article.*|column.*')}):
  55.                 h3 = div.find('h3')
  56.                 title = self.tag_to_string(h3)
  57.                 href = h3.a['href'].replace('.html', '_Printer_Friendly.html')
  58.                 p = div.find('p', attrs = {'class': 'deck'})
  59.                 description = self.tag_to_string(p)
  60.  
  61.                 articles.append({'title': title, 'date': u'', 'url': href, 'description': description})
  62.  
  63.             answer.append((feedTitle, articles))
  64.  
  65.         return answer
  66.  
  67.     def preprocess_html(self, soup):
  68.         img = soup.find('img', attrs = {'src': 'http://images.inc.com/nav/lofi_logo.gif'})
  69.         if img:
  70.             img.parent.extract()
  71.  
  72.         return soup
  73.