home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / forbes_india.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  56 lines

  1. from calibre.ptempfile import PersistentTemporaryFile
  2. from calibre.web.feeds.news import BasicNewsRecipe
  3.  
  4. class AdvancedUserRecipe1276934715(BasicNewsRecipe):
  5.     title          = u'Forbes India'
  6.     __author__            = 'rty'
  7.     description           = 'India Edition Forbes'
  8.     publisher             = 'Forbes India'
  9.     category              = 'Business News, Economy, India'
  10.     oldest_article = 7
  11.     max_articles_per_feed = 100
  12.     remove_javascript = True
  13.     use_embedded_content   = False
  14.     no_stylesheets = True
  15.     language = 'en_IN'
  16.     temp_files = []
  17.     articles_are_obfuscated = True
  18.     conversion_options = {'linearize_tables':True}
  19.     feeds          = [
  20.        (u'Contents', u'http://business.in.com/rssfeed/rss_all.xml'),
  21.                 ]
  22.     extra_css = '''
  23.                    .t-10-gy-l{font-style: italic; font-size: small}
  24.                    .t-30-b-d{font-weight: bold; font-size: xx-large}
  25.                    .t-16-gy-l{font-weight: bold; font-size: x-large; font-syle: italic}
  26.                    .storycontent{font-size: 4px;font-family: Times New Roman;}
  27.                 '''
  28.  
  29.     remove_tags_before  = dict(name='div', attrs={'class':'pdl10 pdr15'})
  30.  
  31.  
  32.     def get_obfuscated_article(self, url):
  33.         br = self.get_browser()
  34.         br.open(url)
  35.         response = br.follow_link(url_regex = r'/printcontent/[0-9]+', nr = 0)
  36.         html = response.read()
  37.         self.temp_files.append(PersistentTemporaryFile('_fa.html'))
  38.         self.temp_files[-1].write(html)
  39.         self.temp_files[-1].close()
  40.         return self.temp_files[-1].name
  41.  
  42.     def get_cover_url(self):
  43.         index = 'http://business.in.com/magazine/'
  44.         soup = self.index_to_soup(index)
  45.         for image in soup.findAll('a',{ "class" : "lbOn a-9-b-d" }):
  46.               return image['href']
  47.               #return image['href'] + '.jpg'
  48.         return None
  49.  
  50.     def preprocess_html(self, soup):
  51.         for item in soup.findAll(style=True):
  52.            del item['style']
  53.         for item in soup.findAll(width=True):
  54.             del item['width']
  55.         return soup
  56.