home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / cosmopolitan.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  2.7 KB  |  70 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
  5. '''
  6. Muy Interesante
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class General(BasicNewsRecipe):
  12.     title                 = 'Cosmopolitan'
  13.     __author__            = 'Gustavo Azambuja'
  14.     description           = 'Revista Cosmopolitan, Edicion Espanola'
  15.     language       = 'es'
  16.     timefmt        = '[%a, %d %b, %Y]'
  17.     use_embedded_content  = False
  18.     recursion             = 1
  19.     encoding = 'utf8'
  20.     remove_javascript = True
  21.     no_stylesheets = True
  22.     conversion_options = {'linearize_tables': True}
  23.  
  24.     oldest_article        = 180
  25.     max_articles_per_feed = 100
  26.     keep_only_tags = [
  27.              dict(id=['contenido']),
  28.              dict(name='td', attrs={'class':['contentheading', 'txt_articulo']})
  29.                      ]
  30.     remove_tags = [
  31.              dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
  32.              dict(name='div', attrs={'id':'comment'}),
  33.              dict(name='table', attrs={'class':'pagenav'}),
  34.              dict(name=['object','link'])
  35.                   ]
  36.     remove_attributes = ['width','height', 'style', 'font', 'color']
  37.  
  38.     extra_css = '''
  39.                 h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
  40.                 h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
  41.                 h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
  42.                 img {float:left; clear:both; margin:10px}
  43.                 p {font-family:Arial,Helvetica,sans-serif;}
  44.                 '''
  45.     feeds = [
  46.                   (u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
  47.     ]
  48.  
  49.     def preprocess_html(self, soup):
  50.         attribs = [  'style','font','valign'
  51.                     ,'colspan','width','height'
  52.                     ,'rowspan','summary','align'
  53.                     ,'cellspacing','cellpadding'
  54.                     ,'frames','rules','border'
  55.                   ]
  56.         for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
  57.             item.name = 'div'
  58.             for attrib in attribs:
  59.                 if item.has_key(attrib):
  60.                    del item[attrib]
  61.         return soup
  62.  
  63.     def get_cover_url(self):
  64.         index = 'http://www.cosmohispano.com/revista'
  65.         soup = self.index_to_soup(index)
  66.         link_item = soup.find('img',attrs={'class':'img_portada'})
  67.         if link_item:
  68.             cover_url = "http://www.cosmohispano.com"+link_item['src']
  69.         return cover_url
  70.