home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / sueddeutsche.recipe < prev    next >
Text File  |  2011-09-09  |  6KB  |  83 lines

  1. # -*- coding: utf-8 -*-
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
  4.  
  5. '''
  6. Fetch sueddeutsche.
  7. '''
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10.  
  11. class Sueddeutsche(BasicNewsRecipe):
  12.  
  13.     title = u'S├╝ddeutsche'
  14.     description = 'News from Germany'
  15.     __author__ = 'Oliver Niesner and Armin Geller'
  16.     use_embedded_content   = False
  17.     timefmt = ' [%d %b %Y]'
  18.     oldest_article = 7
  19.     max_articles_per_feed = 50
  20.     no_stylesheets = True
  21.     language = 'de'
  22.  
  23.     encoding = 'utf-8'
  24.     remove_javascript = True
  25.  
  26.  
  27.     remove_tags = [ dict(name='link'), dict(name='iframe'),
  28.                     dict(name='div', attrs={'id':["bookmarking","themenbox","artikelfoot","CAD_AD",
  29.                           "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),
  30.  
  31.                     dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
  32.                                  "pages closed","basebox right narrow","headslot galleried"]}),
  33.  
  34.                     dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
  35.                              "item","videoBigButton","articlefooter full-column",
  36.                                                      "bildbanderolle full-column","footerCopy padleft5"]}),
  37.  
  38.                     dict(name='p', attrs={'class':["ressortartikeln","artikelFliestext","entry-summary"]}),
  39.                     dict(name='div', attrs={'style':["position:relative;"]}),
  40.                     dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
  41.                     dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
  42.                     dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}),
  43.                     dict(name='td', attrs={'class':["artikelDruckenRight"]}),
  44.                     dict(name='p', text = "ANZEIGE")
  45.                      ]
  46.     remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]
  47.  
  48.     extra_css = '''
  49.                     h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
  50.                     a{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-style:italic;}
  51.                     .dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
  52.                     h1{ font-family:Arial,Helvetica,sans-serif;  font-size:x-large; font-weight:bold;}
  53.                     .artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
  54.                     body{font-family:Arial,Helvetica,sans-serif; }
  55.                     .photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;}                 '''
  56.  
  57.     feeds = [
  58.               (u'Politik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPolitik%24?output=rss'),
  59.               (u'Wirtschaft', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWirtschaft%24?output=rss'),
  60.               (u'Geld', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EGeld%24?output=rss'),
  61.               (u'Kultur', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKultur%24?output=rss'),
  62.               (u'Sport', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ESport%24?output=rss'),
  63.               (u'Leben', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ELeben%24?output=rss'),
  64.               (u'Karriere', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EKarriere%24?output=rss'),
  65.               (u'M├╝nchen&Region', u'http://www.sueddeutsche.de/app/service/rss/ressort/muenchen/rss.xml'),
  66.               (u'Bayern', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EBayern%24?output=rss'),
  67.               (u'Medien', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMedien%24?output=rss'),
  68.               (u'Digital', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EDigital%24?output=rss'),
  69.               (u'Auto', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EAuto%24?output=rss'),
  70.               (u'Wissen', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EWissen%24?output=rss'),
  71.               (u'Panorama', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EPanorama%24?output=rss'),
  72.               (u'Reise', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EReise%24?output=rss'),
  73.               (u'Technik', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5ETechnik%24?output=rss'), # sometimes only
  74.               (u'Macht', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EMacht%24?output=rss'),     # sometimes only
  75.               (u'Job', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EJob%24?output=rss'),         # sometimes only
  76.               (u'Service', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EService%24?output=rss'), # sometimes only
  77.               (u'Verlag', u'http://suche.sueddeutsche.de/query/%23/sort/-docdatetime/drilldown/%C2%A7ressort%3A%5EVerlag%24?output=rss'),   # sometimes only
  78.              ]
  79.  
  80.     def print_version(self, url):
  81.         main, sep, id = url.rpartition('/')
  82.         return main + '/2.220/' + id
  83.