home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / politika.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  67 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. politika.rs
  5. '''
  6. import re
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8.  
  9. class Politika(BasicNewsRecipe):
  10.     title                 = 'Politika Online'
  11.     __author__            = 'Darko Miletic'
  12.     description           = 'Najstariji dnevni list na Balkanu'
  13.     publisher             = 'Politika novine i Magazini d.o.o'
  14.     category              = 'news, politics, Serbia'
  15.     oldest_article        = 2
  16.     max_articles_per_feed = 100
  17.     no_stylesheets        = True
  18.     use_embedded_content  = False
  19.     encoding              = 'utf8'
  20.     delay                 = 1
  21.     language              = 'sr'
  22.     publication_type      = 'newspaper'    
  23.     masthead_url          = 'http://static.politika.co.rs/images_new/politika.gif'    
  24.     extra_css             = """ 
  25.                                @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} 
  26.                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} 
  27.                                body{font-family: Arial,Helvetica,sans1,sans-serif}
  28.                                h1{font-family: "Times New Roman",Times,serif1,serif}
  29.                                .articledescription{font-family: sans1, sans-serif}
  30.                             """
  31.  
  32.     conversion_options = {
  33.                           'comment'   : description
  34.                         , 'tags'      : category
  35.                         , 'publisher' : publisher
  36.                         , 'language'  : language
  37.                         }
  38.  
  39.  
  40.     preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
  41.  
  42.     keep_only_tags     = [dict(name='div', attrs={'class':'big_article_home item_details'})]
  43.     remove_tags_after  = dict(attrs={'class':'online_date'})
  44.     remove_tags        = [dict(name=['link','meta','iframe','embed','object'])]
  45.     
  46.     feeds          = [
  47.                          (u'Politika'         , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml'         )
  48.                         ,(u'Svet'             , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml'             )
  49.                         ,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
  50.                         ,(u'Pogledi'          , u'http://www.politika.rs/pogledi/index.lt.xml'                    )
  51.                         ,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
  52.                         ,(u'Tema dana'        , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml'        )
  53.                         ,(u'Kultura'          , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml'          )
  54.                         ,(u'Spektar'          , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml'     )
  55.                      ]
  56.  
  57.     def preprocess_html(self, soup):
  58.         for item in soup.findAll(style=True):
  59.             del item['style']
  60.         for item in soup.findAll('a', attrs={'class':'category'}):
  61.             item.name='span'
  62.             if item.has_key('href'):            
  63.                del item['href']
  64.             if item.has_key('title'):            
  65.                del item['title']
  66.         return soup
  67.