home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / novosti.recipe < prev    next >
Text File  |  2011-09-09  |  5KB  |  92 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. novosti.rs
  6. '''
  7.  
  8. import re
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class Novosti(BasicNewsRecipe):
  12.     title                 = 'Vecernje Novosti'
  13.     __author__            = 'Darko Miletic'
  14.     description           = 'U po─ìetku su bile istinske ve─ìernje novine - pokrenute u vreme Tr┼í─çanske krize, Italijansko-jugoslovenskog konflikta oko grada Trsta - ali su brzo izrasle u dnevni informativno-politi─ìki list, koji ve─ç godinama ima najve─çi tira┼╛ u Srbiji.'
  15.     publisher             = 'Kompanija Novosti'
  16.     category              = 'news, politics, Serbia'
  17.     oldest_article        = 2
  18.     max_articles_per_feed = 100
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     encoding              = 'utf-8'
  22.     language              = 'sr'
  23.     publication_type      = 'newspaper'
  24.     masthead_url          = 'http://www.novosti.rs/images/basic/logo-print.png'
  25.     extra_css             = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
  26.                                 .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
  27.                                 .author{font-size: small}
  28.                                 .articleLead{font-size: large; font-weight: bold}
  29.                                 img{display: block; margin-bottom: 1em; margin-top: 1em}
  30.                             """
  31.  
  32.     conversion_options = {
  33.                           'comment'      : description
  34.                         , 'tags'         : category
  35.                         , 'publisher'    : publisher
  36.                         , 'language'     : language
  37.                         , 'pretty_print' : True
  38.                         }
  39.  
  40.     preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
  41.  
  42.     keep_only_tags     = [dict(attrs={'class':['articleTitle','articleInfo','articleLead','singlePhoto fl','articleBody']})]
  43.     remove_tags        = [
  44.                             dict(name=['embed','object','iframe','base','link','meta'])
  45.                            ,dict(name='a', attrs={'class':'loadComments topCommentsLink'})
  46.                          ]
  47.     remove_attributes  = ['lang','xmlns:fb']
  48.     
  49.     feeds              = [
  50.                             (u'Politika'     , u'http://www.novosti.rs/rss/2-Sve%20vesti')
  51.                            ,(u'Drustvo'      , u'http://www.novosti.rs/rss/1-Sve%20vesti')
  52.                            ,(u'Ekonomija'    , u'http://www.novosti.rs/rss/3-Sve%20vesti')
  53.                            ,(u'Hronika'      , u'http://www.novosti.rs/rss/4-Sve%20vesti')
  54.                            ,(u'Dosije'       , u'http://www.novosti.rs/rss/5-Sve%20vesti')
  55.                            ,(u'Reportaze'    , u'http://www.novosti.rs/rss/6-Sve%20vesti')
  56.                            ,(u'Tehnologije'  , u'http://www.novosti.rs/rss/35-Sve%20vesti')
  57.                            ,(u'Zanimljivosti', u'http://www.novosti.rs/rss/26-Sve%20vesti')
  58.                            ,(u'Auto'         , u'http://www.novosti.rs/rss/50-Sve%20vesti')
  59.                            ,(u'Sport'        , u'http://www.novosti.rs/rss/11|47|12|14|13-Sve%20vesti')
  60.                            ,(u'Svet'         , u'http://www.novosti.rs/rss/7-Sve%20vesti')
  61.                            ,(u'Region'       , u'http://www.novosti.rs/rss/8-Sve%20vesti')
  62.                            ,(u'Dijaspora'    , u'http://www.novosti.rs/rss/9-Sve%20vesti')
  63.                            ,(u'Spektakl'     , u'http://www.novosti.rs/rss/10-Sve%20vesti')
  64.                            ,(u'Kultura'      , u'http://www.novosti.rs/rss/31-Sve%20vesti')
  65.                            ,(u'Srbija'       , u'http://www.novosti.rs/rss/15-Sve%20vesti')
  66.                            ,(u'Beograd'      , u'http://www.novosti.rs/rss/16-Sve%20vesti')
  67.                            ,(u'Zivot+'       , u'http://www.novosti.rs/rss/24|33|34|25|20|18|32|19-Sve%20vesti')
  68.                            ,(u'Turizam'      , u'http://www.novosti.rs/rss/36-Sve%20vesti')
  69.                          ]
  70.  
  71.     def preprocess_html(self, soup):
  72.         for item in soup.findAll(style=True):
  73.             del item['style']
  74.         for item in soup.findAll('a'):
  75.             limg = item.find('img')
  76.             if item.string is not None:
  77.                str = item.string
  78.                item.replaceWith(str)
  79.             else:
  80.                if limg:
  81.                   item.name = 'div'
  82.                   item.attrs = []
  83.                else:
  84.                    str = self.tag_to_string(item)
  85.                    item.replaceWith(str)
  86.         for item in soup.findAll('img'):
  87.             if not item.has_key('alt'):
  88.                item['alt'] = 'image'
  89.         return soup
  90.  
  91.  
  92.