home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / o_globo.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  3.4 KB  |  82 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. oglobo.globo.com
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class OGlobo(BasicNewsRecipe):
  12.     title                 = 'O Globo'
  13.     __author__            = 'Darko Miletic and Sujata Raman'
  14.     description           = 'News from Brasil'
  15.     publisher             = 'O Globo'
  16.     category              = 'news, politics, Brasil'
  17.     oldest_article        = 2
  18.     max_articles_per_feed = 100
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     encoding              = 'cp1252'
  22.     cover_url             = 'http://oglobo.globo.com/_img/o-globo.png'
  23.     remove_javascript     = True
  24.  
  25.     html2lrf_options = [
  26.                           '--comment', description
  27.                         , '--category', category
  28.                         , '--publisher', publisher
  29.                         ]
  30.  
  31.     html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
  32.  
  33.     extra_css = '''
  34.                     cite{color:#007BB5; font-size:xx-small; font-style:italic;}
  35.                     body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
  36.                     h3{font-size:large; color:#082963; font-weight:bold;}
  37.                     #ident{color:#0179B4; font-size:xx-small;}
  38.                     p{color:#000000;font-weight:normal;}                    
  39.                     .commentario p{color:#007BB5; font-style:italic;}
  40.                 '''
  41.  
  42.     
  43.     keep_only_tags = [dict(name='div', attrs={'id':'ltintb'}),
  44.                       dict(name='a', attrs={'class':['img imgLoader','img ftr imgLoader']}),]
  45.  
  46.     remove_tags = [
  47.                      dict(name='script')
  48.                     ,dict(name='object')
  49.                     ,dict(name='form')
  50.                     ,dict(name='div', attrs={'id':['linksPatGoogle','rdpm','cor','com','env','rcm_st','coment',]})
  51.                     ,dict(name='div', attrs={'class':'box-zap-anu2'})
  52.                     ,dict(name='a', attrs={'class':'assine'})
  53.                     ,dict(name='link')
  54.                   ]
  55.  
  56.  
  57.     feeds = [
  58.                (u'Todos os canais', u'http://oglobo.globo.com/rss/plantao.xml')
  59.               ,(u'Ciencia', u'http://oglobo.globo.com/rss/plantaociencia.xml')
  60.               ,(u'Educacao', u'http://oglobo.globo.com/rss/plantaoeducacao.xml')
  61.               ,(u'Opiniao', u'http://oglobo.globo.com/rss/plantaoopiniao.xml')
  62.               ,(u'Sao Paulo', u'http://oglobo.globo.com/rss/plantaosaopaulo.xml')
  63.               ,(u'Viagem', u'http://oglobo.globo.com/rss/plantaoviagem.xml')
  64.               ,(u'Cultura', u'http://oglobo.globo.com/rss/plantaocultura.xml')
  65.               ,(u'Esportes', u'http://oglobo.globo.com/rss/plantaoesportes.xml')
  66.               ,(u'Mundo', u'http://oglobo.globo.com/rss/plantaomundo.xml')
  67.               ,(u'Pais', u'http://oglobo.globo.com/rss/plantaopais.xml')
  68.               ,(u'Rio', u'http://oglobo.globo.com/rss/plantaorio.xml')
  69.               ,(u'Saude', u'http://oglobo.globo.com/rss/plantaosaude.xml')
  70.               ,(u'Viver Melhor', u'http://oglobo.globo.com/rss/plantaovivermelhor.xml')
  71.               ,(u'Economia', u'http://oglobo.globo.com/rss/plantaoeconomia.xml')
  72.               ,(u'Tecnologia', u'http://oglobo.globo.com/rss/plantaotecnologia.xml')
  73.             ]
  74.  
  75.     def preprocess_html(self, soup):
  76.         for item in soup.findAll(style=True):
  77.             del item['style']
  78.         return soup
  79.  
  80.     language = 'pt'
  81.  
  82.