home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / adevarul.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  2.0 KB  |  60 lines

  1. # -*- coding: utf-8 -*-
  2. #!/usr/bin/env  python
  3.  
  4. __license__   = 'GPL v3'
  5. __copyright__ = u'2011, Silviu Cotoar\u0103'
  6. '''
  7. adevarul.ro
  8. '''
  9.  
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11.  
  12. class Adevarul(BasicNewsRecipe):
  13.     title                 = u'Adev\u0103rul'
  14.     language              = 'ro'
  15.     __author__            = u'Silviu Cotoar\u0103'
  16.     description           = u'\u0218tiri din Rom\u00e2nia'
  17.     publisher             = 'Adevarul'
  18.     category              = 'Ziare,Stiri,Romania'
  19.     oldest_article        = 5
  20.     max_articles_per_feed = 100
  21.     no_stylesheets        = True
  22.     use_embedded_content  = False
  23.     encoding              = 'utf-8'
  24.     remove_javascript     = True
  25.     cover_url         = 'http://upload.wikimedia.org/wikipedia/en/d/d6/Logo_noul_adevarul.png'
  26.  
  27.     conversion_options = {
  28.                              'comments'   : description
  29.                             ,'tags'       : category
  30.                             ,'language'   : language
  31.                 ,'publisher'  : publisher
  32.                          }
  33.  
  34.     keep_only_tags = [  dict(name='div', attrs={'class':'article_header'})
  35.                        ,dict(name='div', attrs={'class':'bb-tu first-t bb-article-body'})
  36.                      ]
  37.  
  38.  
  39.     remove_tags = [ 
  40.                      dict(name='li', attrs={'class':'author'})
  41.                     ,dict(name='li', attrs={'class':'date'})
  42.                     ,dict(name='li', attrs={'class':'comments'})
  43.                     ,dict(name='div', attrs={'class':'bb-wg-article_related_attachements'})
  44.                     ,dict(name='div', attrs={'class':'bb-md bb-md-article_comments'})
  45.                     ,dict(name='form', attrs={'id':'bb-comment-create-form'})
  46.                     ,dict(name='div', attrs={'id':'mediatag'})
  47.                     ,dict(name='div', attrs={'id':'ft'})    
  48.                     ,dict(name='div', attrs={'id':'comment_wrapper'})
  49.                  ]
  50.  
  51.     remove_tags_after = [ 
  52.                             dict(name='div', attrs={'id':'comment_wrapper'}),                                            
  53.                         ]
  54.  
  55.     feeds = [ (u'\u0218tiri', u'http://www.adevarul.ro/rss/latest') ]
  56.  
  57.     def preprocess_html(self, soup):
  58.         return self.adeify_images(soup)
  59.  
  60.