home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / elargentino.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  63 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. elargentino.com
  7. '''
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class ElArgentino(BasicNewsRecipe):
  11.     title                 = 'ElArgentino.com'
  12.     __author__            = 'Darko Miletic'
  13.     description           = 'Informacion Libre las 24 horas'
  14.     publisher             = 'ElArgentino.com'
  15.     category              = 'news, politics, Argentina'
  16.     oldest_article        = 2
  17.     max_articles_per_feed = 100
  18.     remove_javascript     = True
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     encoding              = 'utf8'
  22.     cover_url             = 'http://www.elargentino.com/TemplateWeb/MediosFooter/tapa_elargentino.png'
  23.     language = 'es_AR'
  24.  
  25.  
  26.     html2lrf_options = [
  27.                           '--comment', description
  28.                         , '--category', category
  29.                         , '--publisher', publisher
  30.                         ]
  31.  
  32.     html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
  33.  
  34.     remove_tags = [
  35.                      dict(name='div', attrs={'id':'noprint'              })
  36.                     ,dict(name='div', attrs={'class':'encabezadoImprimir'})
  37.                     ,dict(name='a'  , attrs={'target':'_blank'           })
  38.                   ]
  39.  
  40.     feeds = [
  41.               (u'Portada'     , u'http://www.elargentino.com/Highlights.aspx?Content-Type=text/xml&ChannelDesc=Home'                                             )
  42.              ,(u'Pais'        , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=112&Content-Type=text/xml&ChannelDesc=Pa%C3%ADs'        )
  43.              ,(u'Economia'    , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=107&Content-Type=text/xml&ChannelDesc=Econom%C3%ADa'    )
  44.              ,(u'Mundo'       , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=113&Content-Type=text/xml&ChannelDesc=Mundo'            )
  45.              ,(u'Tecnologia'  , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=118&Content-Type=text/xml&ChannelDesc=Tecnolog%C3%ADa'  )
  46.              ,(u'Espectaculos', u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=114&Content-Type=text/xml&ChannelDesc=Espect%C3%A1culos')
  47.              ,(u'Deportes'    , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=106&Content-Type=text/xml&ChannelDesc=Deportes'         )
  48.              ,(u'Sociedad'    , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=109&Content-Type=text/xml&ChannelDesc=Sociedad'         )
  49.              ,(u'Entrevistas' , u'http://www.elargentino.com/Highlights.aspx?ParentType=Section&ParentId=115&Content-Type=text/xml&ChannelDesc=Entrevistas'      )
  50.             ]
  51.  
  52.     def print_version(self, url):
  53.         main, sep, article_part = url.partition('/nota-')
  54.         article_id, rsep, rrest = article_part.partition('-')
  55.         return u'http://www.elargentino.com/Impresion.aspx?Id=' + article_id
  56.  
  57.     def preprocess_html(self, soup):
  58.         mtag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n<meta http-equiv="Content-Language" content="es-AR"/>\n'
  59.         soup.head.insert(0,mtag)
  60.         for item in soup.findAll(style=True):
  61.             del item['style']
  62.         return soup
  63.