home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / novistandard.recipe < prev    next >
Text File  |  2011-09-09  |  5KB  |  101 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. www.standard.rs
  6. '''
  7.  
  8. import re
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class NoviStandard(BasicNewsRecipe):
  12.     title                  = 'Novi Standard'
  13.     __author__             = 'Darko Miletic'
  14.     description            = 'NoviStandard - energija je neunistiva!'
  15.     publisher              = 'Novi Standard'
  16.     category               = 'news, politics, Serbia'
  17.     no_stylesheets         = True
  18.     delay                  = 1
  19.     oldest_article         = 15
  20.     encoding               = 'utf-8'
  21.     publication_type       = 'magazine'
  22.     needs_subscription     = 'optional'
  23.     remove_empty_feeds     = True
  24.     INDEX                  = 'http://www.standard.rs/'
  25.     use_embedded_content   = False
  26.     language               = 'sr'
  27.     publication_type       = 'magazine'
  28.     masthead_url           = 'http://www.standard.rs/templates/ja_opal/images/red/logo.png'
  29.     extra_css              = """
  30.                                  @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
  31.                                  @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
  32.                                  body{font-family: Arial,"Segoe UI","Trebuchet MS",Helvetica,sans1,sans-serif}
  33.                                  .dropcap{font-family: Georgia,Times,serif1,serif; display:inline}
  34.                                  .dropcap:first-letter{display: inline; font-size: xx-large; font-weight: bold}
  35.                                  .contentheading{color: gray; font-size: x-large}
  36.                                  .article-meta, .createdby{color: red}
  37.                                  img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
  38.                              """
  39.  
  40.     conversion_options = {
  41.                           'comment'   : description
  42.                         , 'tags'      : category
  43.                         , 'publisher' : publisher
  44.                         , 'language'  : language
  45.                         }
  46.  
  47.     preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
  48.  
  49.     def get_browser(self):
  50.         br = BasicNewsRecipe.get_browser()
  51.         br.open(self.INDEX)
  52.         if self.username is not None and self.password is not None:
  53.             br.select_form(name='login')
  54.             br['username'] = self.username
  55.             br['passwd'  ] = self.password
  56.             br.submit()
  57.         return br
  58.  
  59.     keep_only_tags    =[dict(attrs={'class':['contentheading','article-meta','article-content']})]
  60.     remove_tags_after =dict(attrs={'class':'extravote-container'})
  61.     remove_tags       = [
  62.                            dict(name=['object','link','iframe','meta','base'])
  63.                           ,dict(attrs={'class':'extravote-container'})
  64.                         ]
  65.     remove_attributes =['border','background','height','width','align','valign','lang']
  66.     feeds             = [
  67.                             (u'Naslovna', u'http://www.standard.rs/index.php?format=feed&type=rss')
  68.                            ,(u'Politika', u'http://www.standard.rs/vesti/36-politika.html?format=feed&type=rss')
  69.                            ,(u'Cvijanovic preporucuje', u'http://www.standard.rs/-cvijanovi-vam-preporuuje.html?format=feed&type=rss')
  70.                            ,(u'Kolumne', u'http://www.standard.rs/vesti/49-kolumne.html?format=feed&type=rss')
  71.                            ,(u'Kultura', u'http://www.standard.rs/vesti/40-kultura.html?format=feed&type=rss')
  72.                            ,(u'Lifestyle', u'http://www.standard.rs/vesti/39-lifestyle.html?format=feed&type=rss')
  73.                            ,(u'Svet', u'http://www.standard.rs/vesti/41-svet.html?format=feed&type=rss')
  74.                            ,(u'Ekonomija', u'http://www.standard.rs/vesti/37-ekonomija.html?format=feed&type=rss')
  75.                            ,(u'Sport', u'http://www.standard.rs/vesti/38-sport.html?format=feed&type=rss')
  76.                         ]
  77.  
  78.  
  79.     def preprocess_html(self, soup):
  80.         for item in soup.findAll(style=True):
  81.             del item['style']
  82.         for item in soup.findAll('div'):
  83.             if len(item.contents) == 0:
  84.                item.extract()
  85.         for item in soup.findAll('a'):
  86.             limg = item.find('img')
  87.             if item.string is not None:
  88.                str = item.string
  89.                item.replaceWith(str)
  90.             else:
  91.                if limg:
  92.                   item.name = 'div'
  93.                   item.attrs = []
  94.                else:
  95.                    str = self.tag_to_string(item)
  96.                    item.replaceWith(str)
  97.         for item in soup.findAll('img'):
  98.             if not item.has_key('alt'):
  99.                item['alt'] = 'image'
  100.         return soup
  101.