home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / businessworldin.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  99 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. www.businessworld.in
  5. '''
  6.  
  7. from calibre import strftime
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class BusinessWorldMagazine(BasicNewsRecipe):
  11.     title                = 'Business World Magazine'
  12.     __author__           = 'Darko Miletic'
  13.     description          = 'News from India'
  14.     publisher            = 'ABP Pvt Ltd Publication'
  15.     category             = 'news, politics, finances, India, Asia'
  16.     delay                = 1
  17.     no_stylesheets       = True
  18.     INDEX                = 'http://www.businessworld.in/bw/Magazine_Current_Issue'
  19.     ROOT                 = 'http://www.businessworld.in'
  20.     use_embedded_content = False
  21.     encoding             = 'utf-8'
  22.     language             = 'en_IN'
  23.     extra_css            = """
  24.                               img{display: block; margin-bottom: 0.5em}
  25.                               body{font-family: Arial,Helvetica,sans-serif}
  26.                               h2{color: gray; display: block}
  27.                            """
  28.  
  29.     conversion_options = {
  30.                           'comment'          : description
  31.                         , 'tags'             : category
  32.                         , 'publisher'        : publisher
  33.                         , 'language'         : language
  34.                         }
  35.  
  36.     def is_in_list(self,linklist,url):
  37.         for litem in linklist:
  38.             if litem == url:
  39.                return True
  40.         return False
  41.     
  42.     
  43.     def parse_index(self):
  44.         articles = []
  45.         linklist = []
  46.         soup = self.index_to_soup(self.INDEX)
  47.         
  48.         tough = soup.find('div', attrs={'id':'tough'})
  49.         if tough:
  50.            for item in tough.findAll('h1'):
  51.                 description = ''
  52.                 title_prefix = ''
  53.                 feed_link = item.find('a')
  54.                 if feed_link and feed_link.has_key('href'):
  55.                     url   = self.ROOT + feed_link['href']
  56.                     if not self.is_in_list(linklist,url):
  57.                         title = title_prefix + self.tag_to_string(feed_link)
  58.                         date  = strftime(self.timefmt)
  59.                         articles.append({
  60.                                           'title'      :title
  61.                                          ,'date'       :date
  62.                                          ,'url'        :url
  63.                                          ,'description':description
  64.                                         })
  65.                         linklist.append(url)
  66.         
  67.         for item in soup.findAll('div', attrs={'class':'nametitle'}):
  68.             description = ''
  69.             title_prefix = ''
  70.             feed_link = item.find('a')
  71.             if feed_link and feed_link.has_key('href'):
  72.                 url   = self.ROOT + feed_link['href']
  73.                 if not self.is_in_list(linklist,url):
  74.                     title = title_prefix + self.tag_to_string(feed_link)
  75.                     date  = strftime(self.timefmt)
  76.                     articles.append({
  77.                                       'title'      :title
  78.                                      ,'date'       :date
  79.                                      ,'url'        :url
  80.                                      ,'description':description
  81.                                     })
  82.                     linklist.append(url)
  83.         return [(soup.head.title.string, articles)]
  84.  
  85.     
  86.     keep_only_tags = [dict(name='div', attrs={'id':'printwrapper'})]
  87.     remove_tags = [dict(name=['object','link','meta','base','iframe','link','table'])]
  88.  
  89.     def print_version(self, url):
  90.         return url.replace('/bw/','/bw/storyContent/')
  91.  
  92.     def get_cover_url(self):
  93.         cover_url = None
  94.         soup = self.index_to_soup(self.INDEX)
  95.         cover_item = soup.find('img',attrs={'class':'toughbor'})
  96.         if cover_item:
  97.            cover_url = self.ROOT + cover_item['src']
  98.         return cover_url
  99.