home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / toyokeizai.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  69 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
  3. '''
  4. www.toyokeizai.net
  5. '''
  6.  
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8. import re
  9.  
  10. class Toyokeizai(BasicNewsRecipe):
  11.     title          = u'ToyoKeizai News'
  12.     __author__     = 'Hiroshi Miura'
  13.     oldest_article = 1
  14.     max_articles_per_feed = 50
  15.     description    = 'Japanese traditional economy and business magazine, only for advanced subscribers supported'
  16.     publisher      = 'Toyokeizai Shinbun Sha'
  17.     category       = 'economy, magazine, japan'
  18.     language       = 'ja'
  19.     encoding       = 'euc-jp'
  20.     index          = 'http://member.toyokeizai.net/news/'
  21.     remove_javascript = True
  22.     no_stylesheets = True
  23.     masthead_title = u'TOYOKEIZAI'
  24.     needs_subscription = True
  25.     timefmt = '[%y/%m/%d]'
  26.     recursions = 5
  27.     match_regexps =[ r'page/\d+']
  28.  
  29.     keep_only_tags = [
  30.                       dict(name='div', attrs={'class':['news']}),
  31.                       dict(name='div', attrs={'class':["news_cont"]}),
  32.                       dict(name='div', attrs={'class':["news_con"]}),
  33. #                      dict(name='div', attrs={'class':["norightsMessage"]})
  34.                      ]
  35.     remove_tags = [{'class':"mt35 mgz"},
  36.                             {'class':"mt20 newzia"},
  37.                             {'class':"mt20 fontS"},
  38.                             {'class':"bk_btn_m"},
  39.                             dict(id='newzia_connect_member')
  40.                             ]
  41.  
  42.     def parse_index(self):
  43.         feeds = []
  44.         soup   = self.index_to_soup(self.index)
  45.         topstories = soup.find('ul',attrs={'class':'list6'})
  46.         if topstories:
  47.             newsarticles = []
  48.             for itt in topstories.findAll('li'):
  49.                 itema = itt.find('a',href=True)
  50.                 itemd = itt.find('span')
  51.                 newsarticles.append({
  52.                                       'title'      :itema.string
  53.                                      ,'date'       :re.compile(r"\- ").sub("",itemd.string)
  54.                                      ,'url'        :'http://member.toyokeizai.net' + itema['href']
  55.                                      ,'description':itema['title']
  56.                                     })
  57.             feeds.append(('news', newsarticles))
  58.         return feeds
  59.  
  60.     def get_browser(self):
  61.         br = BasicNewsRecipe.get_browser()
  62.         if self.username is not None and self.password is not None:
  63.             br.open('http://member.toyokeizai.net/norights/form/')
  64.             br.select_form(nr=0)
  65.             br['kaiin_id']   = self.username
  66.             br['password'] = self.password
  67.             br.submit()
  68.         return br
  69.