home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / l_espresso.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  4.6 KB  |  78 lines

  1. #!/usr/bin/env  python
  2. __license__     = 'GPL v3'
  3. __author__      = 'Lorenzo Vigentini, Gabriele Marini'
  4. __copyright__   = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
  5. __version__     = 'v1.02'
  6. __date__        = '10, January 2010'
  7. __description__ = 'Italian weekly magazine'
  8.  
  9. '''espresso.repubblica.it'''
  10.  
  11. from calibre.web.feeds.news import BasicNewsRecipe
  12.  
  13. class Espresso(BasicNewsRecipe):
  14.     __author__     = 'Lorenzo Vigentini, Gabriele Marini'
  15.     description    = 'Italian weekly magazine'
  16.  
  17.     cover_url      = 'http://espresso.repubblica.it/images/logo_espresso.gif'
  18.     title          = 'L\'Espresso '
  19.     publisher      = 'Gruppo editoriale L\'Espresso'
  20.     category       = 'News, politics, culture, economy, general interest'
  21.  
  22.     language       = 'it'
  23. #    encoding       = 'cp1252'
  24.     timefmt        = '[%a, %d %b, %Y]'
  25.  
  26.     oldest_article        = 16
  27.     max_articles_per_feed = 100
  28.     use_embedded_content  = False
  29.     recursion             = 10
  30.  
  31.     remove_javascript     = True
  32.     no_stylesheets = True
  33.  
  34.  
  35.     feeds          = [
  36.                        (u'Homepage', u'http://kpm.data.kataweb.it/kpm3eolx/rss/home'),
  37.                        (u'Local', u'http://kpm.data.kataweb.it/kpm3eolx/rss/local'),
  38.                        (u'Style & Design', u'http://kpm.data.kataweb.it/kpm3eolx/rss/style_design'),
  39.                        (u'Opinioni', u'http://kpm.data.kataweb.it/kpm3eolx/rss/opinioni'),
  40.                        (u'Rubriche', u'http://kpm.data.kataweb.it/kpm3eolx/rss/rubriche'),
  41.                        (u'Limes', u'http://temi.repubblica.it/limes/feed/'),
  42.                        (u'Chiesa: HomePage', u'http://data.kataweb.it/rss/chiesa/homepage/it'),
  43.                        (u'Chiesa: Speciali e Focus', u'http://data.kataweb.it/rss/chiesa/speciali_e_focus/it')
  44.                     ]
  45.  
  46.  
  47.     def print_version(self,url):
  48.         print url[7:25]
  49.         if url[7:25] == 'temi.repubblica.it':
  50.           return url + '/?printpage=undefined'
  51.         elif url[7:25] == 'www.chiesa.espress':
  52.           return url
  53.         return url + '/&print=true'
  54.  
  55.  
  56.     keep_only_tags     = [
  57.                             dict(name='div', attrs={'class':['testo','copertina','occhiello','firma','didascalia','content-second-right','detail-articles','titolo-local','generic-articles']}),
  58.                             dict(name='div', attrs={'class':['generic-articles','summary','detail-articles']}),
  59.                             dict(name='div', attrs={'id':['content-second-right','content2']})
  60.                           ]
  61.  
  62.     remove_tags        = [
  63.                             dict(name='div',attrs={'class':['servizi','aggiungi','label-web','bottom-mobile','box-abbonamenti','box-cerca','big','little','stampaweb']}),
  64.                             dict(name='div',attrs={'id':['topheader','header','navigation-new','navigation','content-second-left','menutext']}),
  65.                             dict(name='ul',attrs={'id':'user-utility'}),
  66.                             dict(name=['script','noscript','iframe'])
  67.                          ]
  68. #    extra_css = '''
  69. #                h1 {font-family:Times New Roman,"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:24px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
  70. #                h2 {font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
  71. #                h3 {color:#333333;font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
  72. #                h4 {color:#333333; font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
  73. #                h5 {color:#333333; font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
  74. #                .firma {color:#333333;font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
  75. #                .testo {font-family:Times New Roman, "Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
  76. #                '''
  77.  
  78.