home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / la_tribuna.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  75 lines

  1. # -*- coding: utf-8 -*-
  2. __license__   = 'GPL v3'
  3. __author__    = 'Luis Hernandez'
  4. __copyright__ = 'Luis Hernandez<tolyluis@gmail.com>'
  5. __version__     = 'v1.0'
  6. __date__        = '01 Feb 2011'
  7.  
  8. '''
  9. http://www.promecal.es/
  10. '''
  11. from calibre.web.feeds.news import BasicNewsRecipe
  12.  
  13. class AdvancedUserRecipe1294946868(BasicNewsRecipe):
  14.  
  15.     title             = u'La Tribuna de'
  16.     publisher      = u'Grupo PROMECAL'
  17.  
  18.     __author__  = 'Luis Hern├índez'
  19.     description   = 'Varios diarios locales del grupo PROMECAL'
  20.  
  21.     oldest_article = 3
  22.     max_articles_per_feed = 50
  23.  
  24.     remove_javascript = True
  25.     no_stylesheets        = True
  26.     use_embedded_content  = False
  27.  
  28.     encoding              = 'utf-8'
  29.     language              = 'es'
  30.     timefmt        = '[%a, %d %b, %Y]'
  31.  
  32.     keep_only_tags     = [
  33.                                    dict(name='div', attrs={'id':['articulo']})
  34.                                   ,dict(name='div', attrs={'class':['foto']})
  35.                                   ,dict(name='p', attrs={'id':['texto']})
  36.                                 ]
  37.  
  38.     remove_tags_before = dict(name='div' , attrs={'class':['comparte']})
  39.     remove_tags_after  = dict(name='div' , attrs={'id':['relacionadas']})
  40.  
  41.     remove_tags  = [
  42.                               dict(name='div', attrs={'id':['relacionadas']})
  43.                              ,dict(name='h3')
  44.                              ,dict(name='h5')
  45.                           ]
  46.  
  47.     extra_css             = """
  48.                                p{text-align: justify; font-size: 100%}
  49.                                body{text-align: left; font-family: serif; font-size: 100%}
  50.                                h1{font-family: sans; font-size:150%; font-weight: bold; text-align: justify;}
  51.                                h2{font-family: sans-serif; font-size:85%; font-style: italic; text-align: justify;}
  52.                                h4{font-family: sans; font-size:75%; font-weight: bold; text-align: center;}
  53.                                img{margin-bottom: 0.4em}
  54.                                 """
  55.  
  56.     def preprocess_html(self, soup):
  57.         for alink in soup.findAll('a'):
  58.             if alink.string is not None:
  59.                tstr = alink.string
  60.                alink.replaceWith(tstr)
  61.         return soup
  62.  
  63.  
  64.     feeds          = [
  65.            (u'Albacete', u'http://www.latribunadealbacete.es/rss.html')
  66.           ,(u'Avila', u'http://www.diariodeavila.es/rss.html')
  67.           ,(u'Burgos', u'http://www.diariodeburgos.es/rss.html')
  68.           ,(u'Ciudad Real', u'http://www.latribunadeciudadreal.es/rss.html')
  69.           ,(u'Palencia', u'http://www.diariopalentino.es/rss.html')
  70.           ,(u'Puertollano', u'http://www.latribunadepuertollano.es/rss.html')
  71.           ,(u'Talavera de la Reina', u'http://www.latribunadetalavera.es/rss.html')
  72.           ,(u'Toledo', u'http://www.latribunadetoledo.es/rss.html')
  73.           ,(u'Valladolid', u'http://www.eldiadevalladolid.com/rss.html')
  74.                        ]
  75.