home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / staradvertiser.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  92 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2011, M. Ching modified from work 2009-2011 Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. staradvertiser.com
  5. '''
  6.  
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8.  
  9. class Starbulletin(BasicNewsRecipe):
  10.     title                 = 'Honolulu Star-Advertiser'
  11.     __author__            = 'Darko Miletic'
  12.     description           = 'Latest national and local Hawaii sports news'
  13.     publisher             = 'Honolulu Star-Advertiser'
  14.     category              = 'news, Honolulu, Hawaii'
  15.     oldest_article        = 2
  16.     needs_subscription    = True
  17.     max_articles_per_feed = 100
  18.     language              = 'en'
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     encoding              = 'utf8'
  22.     publication_type      = 'newspaper'
  23.     masthead_url          = 'http://media.staradvertiser.com/designimages/star-advertiser-logo-small.gif'
  24. #    extra_css             = """
  25. #                                body{font-family: Verdana,Arial,Helvetica,sans-serif}
  26. #                                h1,.brown,.hsa_postCredit{color: #663300}
  27. #                                .storyDeck{font-size: 1.2em; font-weight: bold}
  28. #                                img{display: block}
  29. #                            """
  30.  
  31.     conversion_options = {
  32.                           'comment'          : description
  33.                         , 'tags'             : category
  34.                         , 'publisher'        : publisher
  35.                         , 'language'         : language
  36.                         , 'linearize_tables' : True
  37.                         }
  38.     keep_only_tags = [
  39.                          dict(attrs={'id':'hsa_storyTitle'})
  40.             ,dict(attrs={'id':'hsa_storyTitle article-important'})
  41.                         ,dict(attrs={'class':['hsa_dateStamp','hsa_postCredit','storyDeck']})
  42.                         ,dict(name='span',attrs={'class':['hsa_dateStamp','hsa_postCredit']})
  43.             ,dict(name='span',attrs={'class':['hsa_dateStamp article-important','hsa_postCredit article-important']})
  44.                         ,dict(name='div',attrs={'class':'storytext article-important'})
  45.                         ,dict(name='div',attrs={'class':'storytext'})
  46.                      ]
  47.     remove_tags = [
  48.                      dict(name=['object','link','script','meta','base','iframe'])
  49. # removed 'span' from preceding list to permit keeping of author and timestamp
  50.                     ,dict(attrs={'class':['insideStoryImage','insideStoryAd']})
  51.                     ,dict(attrs={'name':'fb_share'})
  52.                   ]
  53.  
  54.     def get_browser(self):
  55.         br = BasicNewsRecipe.get_browser()
  56.         if self.username is not None and self.password is not None:
  57.             br.open('http://www.staradvertiser.com/manage/Login/')
  58.             br.select_form(name='loginForm')
  59.             br['email']   = self.username
  60.             br['password'] = self.password
  61.             br.submit()
  62.         return br
  63.  
  64.     feeds          = [
  65.                (u'Breaking News', u'http://www.staradvertiser.com/news/breaking/index.rss')
  66.               ,(u'News', u'http://www.staradvertiser.com/newspremium/index.rss')
  67.               ,(u'Business', u'http://www.staradvertiser.com/businesspremium/index.rss')
  68.               ,(u'Sports', u'http://www.staradvertiser.com/sportspremium/index.rss')
  69.               ,(u'Features', u'http://www.staradvertiser.com/featurespremium/index.rss')
  70.              ]
  71.  
  72.     def preprocess_html(self, soup):
  73.         for item in soup.findAll(style=True):
  74.             del item['style']
  75.         for item in soup.findAll('a'):
  76.             limg = item.find('img')
  77.             if item.string is not None:
  78.                str = item.string
  79.                item.replaceWith(str)
  80.             else:
  81.                if limg:
  82.                   item.name = 'div'
  83.                   item.attrs = []
  84.                else:
  85.                    str = self.tag_to_string(item)
  86.                    item.replaceWith(str)
  87.         for item in soup.findAll('img'):
  88.             if not item.has_key('alt'):
  89.                item['alt'] = 'image'
  90.         return soup
  91.  
  92.