home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / nikkei_sub.recipe < prev    next >
Text File  |  2011-09-09  |  7KB  |  126 lines

  1. import re
  2. from calibre.web.feeds.recipes import BasicNewsRecipe
  3. import mechanize
  4. from calibre.ptempfile import PersistentTemporaryFile
  5.  
  6.  
  7. class NikkeiNet_subscription(BasicNewsRecipe):
  8.     title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(MAX)'
  9.     __author__      = 'Hiroshi Miura'
  10.     description     = 'News and current market affairs from Japan, gather MAX articles'
  11.     needs_subscription = True
  12.     oldest_article  = 2
  13.     max_articles_per_feed = 10
  14.     language        = 'ja'
  15.     remove_javascript = False
  16.     temp_files = []
  17.  
  18.     remove_tags_before = {'class':"cmn-section cmn-indent"}
  19.     remove_tags = [
  20.                        {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
  21.                        {'class':"cmn-article_keyword cmn-clearfix"},
  22.                        {'class':"cmn-print_headline cmn-clearfix"},
  23.                          ]
  24.     remove_tags_after = {'class':"cmn-pr_list"}
  25.  
  26.  
  27.     def get_browser(self):
  28.         br = BasicNewsRecipe.get_browser()
  29.  
  30.         cj = mechanize.LWPCookieJar()
  31.         br.set_cookiejar(cj)
  32.  
  33.         #br.set_debug_http(True)
  34.         #br.set_debug_redirects(True)
  35.         #br.set_debug_responses(True)
  36.  
  37.         if self.username is not None and self.password is not None:
  38.             #print "----------------------------get login form--------------------------------------------"
  39.             # open login form
  40.             br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
  41.             response = br.response()
  42.             #print "----------------------------get login form---------------------------------------------"
  43.             #print "----------------------------set login form---------------------------------------------"
  44.             # remove disabled input which brings error on mechanize
  45.             response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
  46.             response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
  47.             br.set_response(response)
  48.             br.select_form(name='LA0010Form01')
  49.             br['LA0010Form01:LA0010Email']   = self.username
  50.             br['LA0010Form01:LA0010Password'] = self.password
  51.             br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
  52.             br.submit()
  53.             br.response()
  54.             #print "----------------------------send login form---------------------------------------------"
  55.             #print "----------------------------open news main page-----------------------------------------"
  56.             # open news site
  57.             br.open('http://www.nikkei.com/')
  58.             br.response()
  59.             #print "----------------------------www.nikkei.com BODY   --------------------------------------"
  60.             #print response2.get_data()
  61.             #print "-------------------------^^-got auto redirect form----^^--------------------------------"
  62.             # forced redirect in default
  63.             br.select_form(nr=0)
  64.             br.submit()
  65.             response3 = br.response()
  66.             # return some cookie which should be set by Javascript
  67.             #print response3.geturl()
  68.             raw = response3.get_data()
  69.             #print "---------------------------response to form --------------------------------------------"
  70.             # grab cookie from JS and set it
  71.             redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
  72.             br.select_form(nr=0)
  73.  
  74.             self.temp_files.append(PersistentTemporaryFile('_fa.html'))
  75.             self.temp_files[-1].write("#LWP-Cookies-2.0\n")
  76.  
  77.             self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
  78.             self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
  79.             self.temp_files[-1].close()
  80.             cj.load(self.temp_files[-1].name)
  81.  
  82.             br.submit()
  83.  
  84.             #br.set_debug_http(False)
  85.             #br.set_debug_redirects(False)
  86.             #br.set_debug_responses(False)
  87.         return br
  88.  
  89.  
  90.  
  91.     feeds = [ (u'\u65e5\u7d4c\u4f01\u696d',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
  92.          (u'\u65e5\u7d4c\u88fd\u54c1',    u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
  93.          (u'internet',        u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
  94.          (u'\u653f\u6cbb',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
  95.          (u'\u8ca1\u52d9',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
  96.          (u'\u7d4c\u6e08',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
  97.          (u'\u56fd\u969b',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
  98.          (u'\u79d1\u5b66',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
  99.          (u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
  100.          (u'\u304f\u3089\u3057',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
  101.          (u'\u30b9\u30dd\u30fc\u30c4',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
  102.          (u'\u793e\u4f1a',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
  103.          (u'\u30a8\u30b3',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
  104.          (u'\u5065\u5eb7',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
  105.          (u'\u96c7\u7528',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
  106.          (u'\u6559\u80b2',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
  107.          (u'\u304a\u304f\u3084\u307f',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
  108.          (u'\u4eba\u4e8b',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
  109.          (u'\u7279\u96c6',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
  110.          (u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
  111.          (u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
  112.          (u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
  113.          (u'\u4f1a\u898b',         u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
  114.          (u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
  115.          (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
  116.          (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
  117.          (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
  118.          (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
  119.          (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
  120.          (u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
  121.           (u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8',     u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
  122.         ]
  123.  
  124.  
  125.  
  126.