home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / yomiuri_world.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  62 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
  3. '''
  4. www.yomiuri.co.jp
  5. '''
  6.  
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8. import re
  9.  
  10. class YOLNews(BasicNewsRecipe):
  11.     title          = u'Yomiuri Online (World)'
  12.     __author__     = 'Hiroshi Miura'
  13.     oldest_article = 2
  14.     max_articles_per_feed = 50
  15.     description    = 'Japanese traditional newspaper Yomiuri Online News/world news'
  16.     publisher      = 'Yomiuri Online News'
  17.     category       = 'news, japan'
  18.     language       = 'ja'
  19.     encoding       = 'Shift_JIS'
  20.     index          = 'http://www.yomiuri.co.jp/world/'
  21.     remove_javascript = True
  22.     masthead_title = u"YOMIURI ONLINE"
  23.  
  24.     keep_only_tags = [{'class':"article-def"}]
  25.     remove_tags = [{'class':"RelatedArticle"},
  26.                    {'class':"sbtns"}
  27.                     ]
  28.     remove_tags_after = {'class':"date-def"}
  29.  
  30.     def parse_feeds(self):
  31.         feeds = BasicNewsRecipe.parse_feeds(self)
  32.         for curfeed in feeds:
  33.             delList = []
  34.             for a,curarticle in enumerate(curfeed.articles):
  35.                 if re.search(r'rssad.jp', curarticle.url):
  36.                     delList.append(curarticle)
  37.             if len(delList)>0:
  38.                 for d in delList:
  39.                     index = curfeed.articles.index(d)
  40.                     curfeed.articles[index:index+1] = []
  41.         return feeds
  42.  
  43.     def parse_index(self):
  44.         feeds = []
  45.         soup   = self.index_to_soup(self.index)
  46.         topstories = soup.find('ul',attrs={'class':'list-def'})
  47.         if topstories:
  48.            newsarticles = []
  49.            for itt in topstories.findAll('li'):
  50.                 itema = itt.find('a',href=True)
  51.                 if itema:
  52.                     itd1 = itema.findNextSibling(text = True)
  53.                     newsarticles.append({
  54.                                       'title'      :itema.string
  55.                                      ,'date'       :''.join([itd1])
  56.                                      ,'url'        :'http://www.yomiuri.co.jp' + itema['href']
  57.                                      ,'description':''
  58.                                     })
  59.            feeds.append(('World', newsarticles))
  60.         return feeds
  61.  
  62.