home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / nikkei_sub_economy.recipe < prev    next >
Text File  |  2011-09-09  |  5KB  |  111 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
  3. '''
  4. www.nikkei.com
  5. '''
  6.  
  7. import re
  8. from calibre.web.feeds.recipes import BasicNewsRecipe
  9. import mechanize
  10. from calibre.ptempfile import PersistentTemporaryFile
  11.  
  12. class NikkeiNet_sub_economy(BasicNewsRecipe):
  13.     title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
  14.     __author__      = 'Hiroshi Miura'
  15.     description     = 'News and current market affairs from Japan'
  16.     cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
  17.     masthead_url    = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
  18.     needs_subscription = True
  19.     oldest_article  = 2
  20.     max_articles_per_feed = 20
  21.     language        = 'ja'
  22.     remove_javascript = False
  23.     temp_files = []
  24.  
  25.     remove_tags_before = {'class':"cmn-section cmn-indent"}
  26.     remove_tags = [
  27.                        {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
  28.                        {'class':"cmn-article_keyword cmn-clearfix"},
  29.                        {'class':"cmn-print_headline cmn-clearfix"},
  30.                        {'class':"cmn-article_list"},
  31.                        dict(id="ABOUT-NIKKEI"),
  32.                        {'class':"cmn-sub_market"},
  33.                          ]
  34.     remove_tags_after = {'class':"cmn-pr_list"}
  35.  
  36.     feeds = [  (u'\u653f\u6cbb',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
  37.          (u'\u8ca1\u52d9',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
  38.          (u'\u7d4c\u6e08',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
  39.          (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
  40.          (u'\u96c7\u7528',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
  41.          (u'\u6559\u80b2',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
  42.          (u'\u304a\u304f\u3084\u307f',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
  43.          (u'\u4eba\u4e8b',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
  44.         ]
  45.  
  46.     def get_browser(self):
  47.         br = BasicNewsRecipe.get_browser()
  48.  
  49.         cj = mechanize.LWPCookieJar()
  50.         br.set_cookiejar(cj)
  51.  
  52.         #br.set_debug_http(True)
  53.         #br.set_debug_redirects(True)
  54.         #br.set_debug_responses(True)
  55.  
  56.         if self.username is not None and self.password is not None:
  57.             #print "----------------------------get login form--------------------------------------------"
  58.             # open login form
  59.             br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
  60.             response = br.response()
  61.             #print "----------------------------get login form---------------------------------------------"
  62.             #print "----------------------------set login form---------------------------------------------"
  63.             # remove disabled input which brings error on mechanize
  64.             response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
  65.             response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
  66.             br.set_response(response)
  67.             br.select_form(name='LA0010Form01')
  68.             br['LA0010Form01:LA0010Email']   = self.username
  69.             br['LA0010Form01:LA0010Password'] = self.password
  70.             br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
  71.             br.submit()
  72.             br.response()
  73.             #print "----------------------------send login form---------------------------------------------"
  74.             #print "----------------------------open news main page-----------------------------------------"
  75.             # open news site
  76.             br.open('http://www.nikkei.com/')
  77.             br.response()
  78.             #print "----------------------------www.nikkei.com BODY   --------------------------------------"
  79.             #print response2.get_data()
  80.             #print "-------------------------^^-got auto redirect form----^^--------------------------------"
  81.             # forced redirect in default
  82.             br.select_form(nr=0)
  83.             br.submit()
  84.             response3 = br.response()
  85.             # return some cookie which should be set by Javascript
  86.             #print response3.geturl()
  87.             raw = response3.get_data()
  88.             #print "---------------------------response to form --------------------------------------------"
  89.             # grab cookie from JS and set it
  90.             redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
  91.             br.select_form(nr=0)
  92.  
  93.             self.temp_files.append(PersistentTemporaryFile('_fa.html'))
  94.             self.temp_files[-1].write("#LWP-Cookies-2.0\n")
  95.  
  96.             self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
  97.             self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
  98.             self.temp_files[-1].close()
  99.             cj.load(self.temp_files[-1].name)
  100.  
  101.             br.submit()
  102.  
  103.             #br.set_debug_http(False)
  104.             #br.set_debug_redirects(False)
  105.             #br.set_debug_responses(False)
  106.         return br
  107.  
  108.  
  109.  
  110.  
  111.