home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / lavanguardia.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  70 lines

  1. #!/usr/bin/env  python
  2. # -*- coding: utf-8 -*-
  3.  
  4. __license__   = 'GPL v3'
  5. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  6. '''
  7. www.lavanguardia.es
  8. '''
  9.  
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11. from calibre.ebooks.BeautifulSoup import Tag
  12.  
  13. class LaVanguardia(BasicNewsRecipe):
  14.     title                 = 'La Vanguardia Digital'
  15.     __author__            = 'Darko Miletic'
  16.     description           = u'Noticias desde Espa├▒a'
  17.     publisher             = 'La Vanguardia'
  18.     category              = 'news, politics, Spain'
  19.     oldest_article        = 2
  20.     max_articles_per_feed = 100
  21.     no_stylesheets        = True
  22.     use_embedded_content  = False
  23.     delay                 = 5
  24.  #   encoding              = 'cp1252'
  25.     language = 'es'
  26.  
  27.     direction             = 'ltr'
  28.  
  29.     html2lrf_options = [
  30.                           '--comment'  , description
  31.                         , '--category' , category
  32.                         , '--publisher', publisher
  33.                         ]
  34.  
  35.     html2epub_options  = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
  36.  
  37.     feeds              = [
  38.                             (u'Portada'           , u'http://feeds.feedburner.com/lavanguardia/home'   )
  39.                           ,(u'Cultura'              , u'http://feeds.feedburner.com/lavanguardia/cultura'      )
  40.                            ,(u'Deportes'             , u'http://feeds.feedburner.com/lavanguardia/deportes'     )
  41.                            ,(u'Economia'             , u'http://feeds.feedburner.com/lavanguardia/economia'     )
  42.                            ,(u'El lector opina'      , u'http://feeds.feedburner.com/lavanguardia/lectoropina'  )
  43.                            ,(u'Gente y TV'           , u'http://feeds.feedburner.com/lavanguardia/gente'        )
  44.                            ,(u'Internacional'        , u'http://feeds.feedburner.com/lavanguardia/internacional')
  45.                            ,(u'Internet y tecnologia', u'http://feeds.feedburner.com/lavanguardia/internet'     )
  46.                            ,(u'Motor'                , u'http://feeds.feedburner.com/lavanguardia/motor'        )
  47.                            ,(u'Politica'             , u'http://feeds.feedburner.com/lavanguardia/politica'     )
  48.                            ,(u'Sucesos'             , u'http://feeds.feedburner.com/lavanguardia/sucesos'      )
  49.                          ]
  50.  
  51.  
  52.     keep_only_tags = [
  53.                        dict(name='div', attrs={'class':'detalle  noticia'})
  54.                     ]
  55.  
  56.     remove_tags        = [
  57.                              dict(name=['object','link','script'])
  58.                             ,dict(name='div', attrs={'class':['colC','peu','jstoolbar']})
  59.                          ]
  60.  
  61.     remove_tags_after = [dict(name='div', attrs={'class':'text'})]
  62.  
  63.     def preprocess_html(self, soup):
  64.         soup.html['dir' ] = self.direction
  65.         mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
  66.         soup.head.insert(0,mcharset)
  67.         for item in soup.findAll(style=True):
  68.             del item['style']
  69.         return soup
  70.