home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / clarin.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  76 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. clarin.com
  6. '''
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class Clarin(BasicNewsRecipe):
  11.     title                 = 'Clarin'
  12.     __author__            = 'Darko Miletic'
  13.     description           = 'Noticias de Argentina y mundo'
  14.     publisher             = 'Grupo Clarin'
  15.     category              = 'news, politics, Argentina'
  16.     oldest_article        = 2
  17.     max_articles_per_feed = 100
  18.     use_embedded_content  = False
  19.     no_stylesheets        = True
  20.     encoding              = 'utf8'
  21.     delay                 = 1
  22.     language              = 'es_AR'
  23.     publication_type      = 'newspaper'
  24.     INDEX                 = 'http://www.clarin.com'
  25.     masthead_url          = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg'
  26.     extra_css             = """ 
  27.                                body{font-family: Arial,Helvetica,sans-serif} 
  28.                                h2{font-family: Georgia,serif; font-size: xx-large} 
  29.                                .hora{font-weight:bold} 
  30.                                .hd p{font-size: small} 
  31.                                .nombre-autor{color: #0F325A} 
  32.                             """
  33.  
  34.     conversion_options = {
  35.                           'comment'  : description
  36.                         , 'tags'     : category
  37.                         , 'publisher': publisher
  38.                         , 'language' : language
  39.                         }
  40.  
  41.     keep_only_tags    = [dict(attrs={'class':['hd','mt']})]
  42.     remove_tags       = [dict(name=['meta','base','link'])]
  43.     remove_attributes = ['lang','_mce_bogus']
  44.  
  45.     feeds = [
  46.                (u'Pagina principal', u'http://www.clarin.com/rss/'             )
  47.               ,(u'Politica'        , u'http://www.clarin.com/rss/politica/'    )
  48.               ,(u'Deportes'        , u'http://www.clarin.com/rss/deportes/'    )
  49.               ,(u'Economia'        , u'http://www.clarin.com/economia/'        )
  50.               ,(u'Mundo'           , u'http://www.clarin.com/rss/mundo/'       )
  51.               ,(u'Espectaculos'    , u'http://www.clarin.com/rss/espectaculos/')
  52.               ,(u'Sociedad'        , u'http://www.clarin.com/rss/sociedad/'    )
  53.               ,(u'Ciudades'        , u'http://www.clarin.com/rss/ciudades/'    )
  54.               ,(u'Policiales'      , u'http://www.clarin.com/rss/policiales/'  )
  55.               ,(u'Internet'        , u'http://www.clarin.com/rss/internet/'    )
  56.               ,(u'Ciudades'        , u'http://www.clarin.com/rss/ciudades/'    )
  57.             ]
  58.  
  59.     
  60.     def get_article_url(self, article):
  61.         return article.get('guid',  None)
  62.     
  63.     def print_version(self, url):
  64.         return url + '?print=1'
  65.  
  66.     def get_cover_url(self):
  67.         cover_url = None
  68.         soup = self.index_to_soup(self.INDEX)
  69.         cover_item = soup.find('div',attrs={'class':'bb-md bb-md-edicion_papel'})
  70.         if cover_item:
  71.            ap = cover_item.find('a',attrs={'href':'/edicion-impresa/'})
  72.            if ap:
  73.               cover_url = self.INDEX + ap.img['src']
  74.         return cover_url
  75.  
  76.