home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / express_de.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  73 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2. class AdvancedUserRecipe1303841067(BasicNewsRecipe):
  3.  
  4.     title          = u'Express.de'
  5.     __author__  = 'schuster'
  6.     oldest_article = 2
  7.     max_articles_per_feed = 50
  8.     no_stylesheets         = True
  9.     use_embedded_content   = False
  10.     language               = 'de'
  11.     extra_css = '''
  12.                     h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small;}
  13.                     h1{ font-family:Arial,Helvetica,sans-serif;  font-size:x-large; font-weight:bold;}
  14.                 '''
  15.     remove_javascript      = True
  16.     remove_tags_befor = [dict(name='div', attrs={'class':'Datum'})]
  17.     remove_tags_after = [dict(name='div', attrs={'class':'MoreNews'})]
  18.  
  19.     remove_tags = [dict(id='kalaydo'),
  20.            dict(id='Header'),
  21.            dict(id='Searchline'),
  22.            dict(id='MainNav'),
  23.            dict(id='Logo'),
  24.            dict(id='MainLinkSpacer'),
  25.            dict(id='MainLinks'),
  26.            dict(id='ContainerPfad'), #neu
  27.            dict(title='Diese Seite Bookmarken'),
  28.  
  29.                                            dict(name='span'),
  30.            dict(name='div', attrs={'class':'spacer_leftneu'}),
  31.            dict(name='div', attrs={'class':'button kalaydologo'}),
  32.            dict(name='div', attrs={'class':'button stellenneu'}),
  33.            dict(name='div', attrs={'class':'button autoneu'}),
  34.            dict(name='div', attrs={'class':'button immobilienneu'}),
  35.            dict(name='div', attrs={'class':'button kleinanzeigen'}),
  36.            dict(name='div', attrs={'class':'button tiereneu'}),
  37.            dict(name='div', attrs={'class':'button ferienwohnungen'}),
  38.            dict(name='div', attrs={'class':'button inserierenneu'}),
  39.            dict(name='div', attrs={'class':'spacer_rightneu'}),
  40.            dict(name='div', attrs={'class':'spacer_rightcorner'}),
  41.            dict(name='div', attrs={'class':'HeaderMetaNav'}),
  42.            dict(name='div', attrs={'class':'HeaderSearchOption'}),
  43.            dict(name='div', attrs={'class':'HeaderSearch'}),
  44.            dict(name='div', attrs={'class':'sbutton'}),
  45.            dict(name='div', attrs={'class':'active'}),
  46.            dict(name='div', attrs={'class':'MoreNews'}), #neu
  47.            dict(name='div', attrs={'class':'ContentBoxSubline'}) #neu
  48. ]
  49.  
  50.  
  51.     def preprocess_html(self, soup):
  52.         for alink in soup.findAll('a'):
  53.             if alink.string is not None:
  54.                tstr = alink.string
  55.                alink.replaceWith(tstr)
  56.         return soup
  57.  
  58.     feeds          = [(u'Top-Themen', u'http://www.express.de/home/-/2126/2126/-/view/asFeed/-/index.xml'),
  59.                           (u'Regional - K├╢ln', u'http://www.express.de/regional/koeln/-/2856/2856/-/view/asFeed/-/index.xml'),
  60.                           (u'Regional - Bonn', u'http://www.express.de/regional/bonn/-/2860/2860/-/view/asFeed/-/index.xml'),
  61.                           (u'Regional - D├╝sseldorf', u'http://www.express.de/regional/duesseldorf/-/2858/2858/-/view/asFeed/-/index.xml'),
  62.                           (u'Regional - Region', u'http://www.express.de/regional/-/2178/2178/-/view/asFeed/-/index.xml'),
  63.                           (u'Sport-News', u'http://www.express.de/sport/-/2176/2176/-/view/asFeed/-/index.xml'),
  64.                           (u'Fussball-News', u'http://www.express.de/sport/fussball/-/3186/3186/-/view/asFeed/-/index.xml'),
  65.                           (u'1.FC K├╢ln News', u'http://www.express.de/sport/fussball/fc-koeln/-/3192/3192/-/view/asFeed/-/index.xml'),
  66.                           (u'Alemannia Aachen News', u'http://www.express.de/sport/fussball/alemannia/-/3290/3290/-/view/asFeed/-/index.xml'),
  67.                           (u'Borussia M~Gladbach', u'http://www.express.de/sport/fussball/gladbach/-/3286/3286/-/view/asFeed/-/index.xml'),
  68.                           (u'Fortuna D~Dorf', u'http://www.express.de/sport/fussball/fortuna/-/3292/3292/-/view/asFeed/-/index.xml'),
  69.                           (u'Basketball News', u'http://www.express.de/sport/basketball/-/3190/3190/-/view/asFeed/-/index.xml'),
  70.                           (u'Big Brother', u'http://www.express.de/news/promi-show/big-brother/-/2402/2402/-/view/asFeed/-/index.xml'),
  71.                     ]
  72.  
  73.