home *** CD-ROM | disk | FTP | other *** search
- __license__ = 'GPL v3'
- __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
- '''
- www.nikkei.com
- '''
-
- import re
- from calibre.web.feeds.recipes import BasicNewsRecipe
- import mechanize
- from calibre.ptempfile import PersistentTemporaryFile
-
-
- class NikkeiNet_sub_shakai(BasicNewsRecipe):
- title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
- __author__ = 'Hiroshi Miura'
- description = 'News and current market affairs from Japan'
- cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
- masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
- needs_subscription = True
- oldest_article = 2
- max_articles_per_feed = 20
- language = 'ja'
- remove_javascript = False
- temp_files = []
-
- remove_tags_before = {'class':"cmn-section cmn-indent"}
- remove_tags = [
- {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
- {'class':"cmn-article_keyword cmn-clearfix"},
- {'class':"cmn-print_headline cmn-clearfix"},
- ]
- remove_tags_after = {'class':"cmn-pr_list"}
-
- feeds = [
- (u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
- ]
-
- def get_browser(self):
- br = BasicNewsRecipe.get_browser()
-
- cj = mechanize.LWPCookieJar()
- br.set_cookiejar(cj)
-
- #br.set_debug_http(True)
- #br.set_debug_redirects(True)
- #br.set_debug_responses(True)
-
- if self.username is not None and self.password is not None:
- #print "----------------------------get login form--------------------------------------------"
- # open login form
- br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
- response = br.response()
- #print "----------------------------get login form---------------------------------------------"
- #print "----------------------------set login form---------------------------------------------"
- # remove disabled input which brings error on mechanize
- response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
- response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
- br.set_response(response)
- br.select_form(name='LA0010Form01')
- br['LA0010Form01:LA0010Email'] = self.username
- br['LA0010Form01:LA0010Password'] = self.password
- br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
- br.submit()
- br.response()
- #print "----------------------------send login form---------------------------------------------"
- #print "----------------------------open news main page-----------------------------------------"
- # open news site
- br.open('http://www.nikkei.com/')
- br.response()
- #print "----------------------------www.nikkei.com BODY --------------------------------------"
- #print response2.get_data()
- #print "-------------------------^^-got auto redirect form----^^--------------------------------"
- # forced redirect in default
- br.select_form(nr=0)
- br.submit()
- response3 = br.response()
- # return some cookie which should be set by Javascript
- #print response3.geturl()
- raw = response3.get_data()
- #print "---------------------------response to form --------------------------------------------"
- # grab cookie from JS and set it
- redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
- br.select_form(nr=0)
-
- self.temp_files.append(PersistentTemporaryFile('_fa.html'))
- self.temp_files[-1].write("#LWP-Cookies-2.0\n")
-
- self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
- self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
- self.temp_files[-1].close()
- cj.load(self.temp_files[-1].name)
-
- br.submit()
-
- #br.set_debug_http(False)
- #br.set_debug_redirects(False)
- #br.set_debug_responses(False)
- return br
-
-
-
-
-