home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / kompas.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  78 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2011, Adrian Gunawan <agunawan at adrnalin.com>'
  4. __author__    = 'Adrian Gunawan'
  5. __version__   = 'v1.0'
  6. __date__      = '02 February 2011'
  7.  
  8. '''
  9. http://www.kompas.com/
  10. '''
  11.  
  12. import re
  13. from calibre.web.feeds.news import BasicNewsRecipe
  14.  
  15. class Kompas(BasicNewsRecipe):
  16.     title          = u'Kompas'
  17.     masthead_url   = 'http://stat.k.kidsklik.com/data/2k10/kompascom2011/images/logo_kompas.png'
  18.     cover_url   = 'http://stat.k.kidsklik.com/data/2k10/kompascom2011/images/logo_kompas.png'
  19.  
  20.     __author__     = u'Adrian Gunawan'
  21.     description    = u'Indonesian News from Kompas Online Edition'
  22.     category       = 'local news, international, business, Indonesia'
  23.     language       = 'id'
  24.     oldest_article = 5
  25.     max_articles_per_feed = 100
  26.  
  27.     no_stylesheets        = True
  28.     use_embedded_content  = False
  29.     no_javascript         = True
  30.     remove_empty_feeds    = True
  31.  
  32.     timefmt               = ' [%A, %d %B, %Y]'
  33.     encoding              = 'utf-8'
  34.  
  35.     keep_only_tags = [dict(name='div', attrs ={'class':'content_kiri_detail'})]
  36.  
  37.     extra_css = '''
  38.                   h1{font-family:Georgia,"Times New Roman",Times,serif; font-weight:bold; font-size:large;}
  39.                   .cT-storyDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
  40.                   .articleBody{font-family:Arial,Helvetica,sans-serif; color:black;font-size:small;}
  41.                   .cT-imageLandscape{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:x-small;}
  42.                   .source{font-family:Arial,Helvetica,sans-serif; color:#333333 ;font-size:xx-small;}
  43.                   #content{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
  44.                   .pageprint{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  45.                   #bylineDetails{font-family:Arial,Helvetica,sans-serif; color:#666666;font-size:x-small;}
  46.                   .featurePic-wide{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
  47.                   #idfeaturepic{font-family:Arial,Helvetica,sans-serif;font-size:x-small;}
  48.                   h3{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
  49.                   h2{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
  50.                   h4{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
  51.                   h5{font-family:Georgia,"Times New Roman",Times,serif; font-size:small;}
  52.                   body{font-family:Arial,Helvetica,sans-serif; font-size:x-small;}
  53.                 '''
  54.  
  55.     remove_tags     = [
  56.                         dict(name='div', attrs ={'class':['c_biru_kompas2011', 'c_abu01_kompas2011', 'c_abu_01_kompas2011', 'right', 'clearit']}),
  57.                         dict(name='div', attrs ={'id':['comment_list', 'comment_paging', 'share']}),
  58.                         dict(name='form'),
  59.                         dict(name='ul'),
  60.                        ]
  61.  
  62.     preprocess_regexps = [
  63.                           (re.compile(r'<!--TERKAIT -->.*<!--TERKAIT END -->', re.DOTALL|re.IGNORECASE),lambda match: ''),
  64.                           (re.compile(r'<strong>Sent Using.*</body>', re.DOTALL|re.IGNORECASE),lambda match: ''),
  65.                           (re.compile(r'<strong>Kirim Komentar Anda</strong>', re.DOTALL|re.IGNORECASE),lambda match: ''),
  66.                           (re.compile(r'<a[^>]*>Kembali ke Index Topik Pilihan</a>', re.DOTALL|re.IGNORECASE),lambda match: ''),
  67.                          ]
  68.  
  69.     feeds          = [
  70.                       (u'Nasional', u'http://www.kompas.com/getrss/nasional'),
  71.                       (u'Regional', u'http://www.kompas.com/getrss/regional'),
  72.                       (u'Internasional', u'http://www.kompas.com/getrss/internasional'),
  73.                       (u'Megapolitan', u'http://www.kompas.com/getrss/megapolitan'),
  74.                       (u'Bisnis Keuangan', u'http://www.kompas.com/getrss/bisniskeuangan'),
  75.                       (u'Kesehatan', u'http://www.kompas.com/getrss/kesehatan'),
  76.                       (u'Olahraga', u'http://www.kompas.com/getrss/olahraga'),
  77.                       ]
  78.