home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / axxon_magazine.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  65 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
  3. '''
  4. axxon.com.ar
  5. '''
  6. from calibre import strftime
  7. from calibre.web.feeds.news import BasicNewsRecipe
  8.  
  9. class Axxon_news(BasicNewsRecipe):
  10.     title                 = 'Revista Axxon'
  11.     __author__            = 'Darko Miletic'
  12.     description           = 'Axxon, Ciencia Ficcion en Bits'
  13.     publisher             = 'Revista Axxon - Ciencia Ficcion'
  14.     category              = 'SF, Argentina'
  15.     oldest_article        = 31
  16.     delay                 = 1
  17.     max_articles_per_feed = 100
  18.     no_stylesheets        = False
  19.     use_embedded_content  = False
  20.     language              = 'es_AR'
  21.     encoding              = 'utf-8'
  22.     publication_type      = 'magazine'
  23.     INDEX                 = 'http://axxon.com.ar/rev/'
  24.     extra_css             = ' body{font-family: Verdana,Arial,sans-serif} .editorial{font-family: serif} .posttitle{font-family: "Trebuchet MS","Lucida Grande",Verdana,Arial,sans-serif} .cuento{font-family: "Times New Roman", serif} .biografia{color: red; font-weight: bold; font-family: Verdana,Geneva,Arial,Helvetica,sans-serif} '
  25.  
  26.     conversion_options = {
  27.                           'comment'          : description
  28.                         , 'tags'             : category
  29.                         , 'publisher'        : publisher
  30.                         , 'language'         : language
  31.                         }
  32.  
  33.  
  34.     keep_only_tags     = [dict(name='div', attrs={'class':'post'})]
  35.     remove_tags = [dict(name=['object','link','iframe','embed','img'])]
  36.     remove_tags_after = [dict(attrs={'class':['editorial','correo','biografia','articulo']})]
  37.     remove_attributes = ['width','height','font','border','align']
  38.  
  39.     def parse_index(self):
  40.         articles = []
  41.         soup = self.index_to_soup(self.INDEX)
  42.  
  43.         for item in soup.findAll('strong'):
  44.             description = ''
  45.             title_prefix = ''
  46.             feed_link = item.find('a')
  47.             if feed_link and feed_link.has_key('href') and feed_link['href'].startswith('?p='):
  48.                 url   = self.INDEX + feed_link['href']
  49.                 title = title_prefix + self.tag_to_string(feed_link)
  50.                 date  = strftime(self.timefmt)
  51.                 articles.append({
  52.                                   'title'      :title
  53.                                  ,'date'       :date
  54.                                  ,'url'        :url
  55.                                  ,'description':description
  56.                                 })
  57.         return [(soup.head.title.string, articles)]
  58.  
  59.  
  60.     def preprocess_html(self, soup):
  61.         for item in soup.findAll(style=True):
  62.             del item['style']
  63.         return self.adeify_images(soup)
  64.  
  65.