home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / welt.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  5.8 KB  |  141 lines

  1. __license__   = 'GPL v3'
  2. __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
  3.  
  4. '''
  5. Fetch Weltonline.
  6. '''
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10.  
  11. class weltDe(BasicNewsRecipe):
  12.  
  13.     title = 'Weltonline'
  14.     description = 'german newspaper'
  15.     language = 'de'
  16.     __author__ = 'Oliver Niesner'
  17.     use_embedded_content   = False
  18.     timefmt = ' [%d %b %Y]'
  19.     max_articles_per_feed = 15
  20.     linearize_tables = True
  21.     no_stylesheets = True
  22.     remove_stylesheets = True
  23.     remove_javascript = True
  24.     encoding = 'utf-8'
  25.     html2epub_options = 'base_font_size=10'
  26.     BasicNewsRecipe.summary_length = 100
  27.  
  28.  
  29.     remove_tags = [dict(id='jumplinks'),
  30.            dict(id='ad1'),
  31.            dict(id='top'),
  32.            dict(id='header'),
  33.            dict(id='additionalNavWrapper'),
  34.            dict(id='fullimage_index'),
  35.            dict(id='additionalNav'),
  36.            dict(id='printMenu'),
  37.            dict(id='topteaser1'),
  38.            dict(id='topteaser2'),
  39.            dict(id='servicesBox'),
  40.            dict(id='servicesNav'),
  41.            dict(id='ad2'),
  42.            dict(id='banner_1'),
  43.            dict(id='ssoInfoTop'),
  44.            dict(id='brandingWrapper'),
  45.            dict(id='links-intern'),
  46.            dict(id='navigation'),
  47.            dict(id='subNav'),
  48.            dict(id='branding'),
  49.            dict(id='servicesNav'),
  50.            dict(id='searchArea'),
  51.            dict(id='servicesBox'),
  52.            dict(id='toggleAdvancedSearch'),
  53.            dict(id='mainNav'),
  54.            dict(id='articleInlineMediaBox0'),
  55.            dict(id='sectionSponsor'),
  56.            dict(id='sprucharea'),
  57.            dict(id='xmsg_recommendEmail'),
  58.            dict(id='xmsg_recommendSms'),
  59.            dict(id='xmsg_comment'),
  60.            dict(id='additionalNavWrapper'),
  61.            dict(id='imagebox'),
  62.            dict(id='footerContainer'),
  63.            #dict(id=''),
  64.                    dict(name='span'),
  65.            dict(name='div', attrs={'class':'printURL'}),
  66.            dict(name='ul', attrs={'class':'clear mainNavigation inline'}),
  67.            dict(name='ul', attrs={'class':'inline'}),
  68.            dict(name='ul', attrs={'class':'ubar'}),
  69.            dict(name='hr', attrs={'class':'ubar'}),
  70.            dict(name='li', attrs={'class':'counter'}),
  71.            dict(name='li', attrs={'class':'browseBack'}),
  72.            dict(name='li', attrs={'class':'browseNext'}),
  73.            dict(name='li', attrs={'class':'selected'}),
  74.            dict(name='div', attrs={'class':'floatLeft'}),
  75.            dict(name='div', attrs={'class':'ad'}),
  76.            dict(name='div', attrs={'class':'ftBarLeft'}),
  77.            dict(name='div', attrs={'class':'clear additionalNav'}),
  78.            dict(name='div', attrs={'class':'inlineBox inlineFurtherLinks'}),
  79.            dict(name='div', attrs={'class':'inlineBox videoInlineBox'}),
  80.            dict(name='div', attrs={'class':'inlineGallery'}),
  81.            dict(name='div', attrs={'class':'ratingBox'}),
  82.            dict(name='div', attrs={'class':'socialBookmarks clear'}),
  83.            dict(name='div', attrs={'class':'articleOptions clear'}),
  84.            dict(name='div', attrs={'class':'noPrint galleryIndex'}),
  85.            dict(name='div', attrs={'class':'inlineBox inlineTagCloud'}),
  86.            dict(name='div', attrs={'class':'clear module imageGalleryBig bgColor1'}),
  87.            dict(name='div', attrs={'class':'clear module writeComment bgColor1'}),
  88.            dict(name='div', attrs={'class':'clear module textGallery bgColor1'}),
  89.            dict(name='div', attrs={'class':'moreArtH3'}),
  90.            dict(name='div', attrs={'class':'jqmWindow'}),
  91.            dict(name='div', attrs={'class':'clear gap4'}),
  92.            dict(name='div', attrs={'class':'hidden'}),
  93.            dict(name='div', attrs={'class':'advertising'}),
  94.            dict(name='div', attrs={'class':'ad adMarginBottom'}),
  95.            dict(name='div', attrs={'class':'ad'}),
  96.            dict(name='div', attrs={'class':'topLine'}),
  97.            dict(name='div', attrs={'class':'toplineH2'}),
  98.            dict(name='div', attrs={'class':'headLineH3'}),
  99.            dict(name='div', attrs={'class':'print'}),
  100.            dict(name='div', attrs={'class':'clear menu'}),
  101.            dict(name='div', attrs={'class':'themenalarm'}),
  102.            dict(name='p', attrs={'class':'jump'}),
  103.            dict(name='a', attrs={'class':'commentLink'}),
  104.            dict(name='h2', attrs={'class':'jumpHeading'}),
  105.            dict(name='ul', attrs={'class':'optionsSubNav clear'}),
  106.            dict(name='li', attrs={'class':'next'}),
  107.            dict(name='li', attrs={'class':'prev'}),
  108.            dict(name='li', attrs={'class':'last'}),
  109.            dict(name='table', attrs={'class':'textGallery'}),
  110.            dict(name='li', attrs={'class':'active'})]
  111.  
  112.     remove_tags_after = [dict(name='div', attrs={'class':'themenalarm'})]
  113.  
  114.     extra_css = '''
  115.                     h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
  116.                     a{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-style:italic;}
  117.                     .dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
  118.                     h1{ font-family:Arial,Helvetica,sans-serif;  font-size:x-large; font-weight:bold;}
  119.                     .artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
  120.                     body{font-family:Arial,Helvetica,sans-serif; }
  121.                     .photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;}                 '''
  122.  
  123.     feeds =  [ ('Politik', 'http://welt.de/politik/?service=Rss'),
  124.            ('Deutsche Dinge', 'http://www.welt.de/deutsche-dinge/?service=Rss'),
  125.            ('Wirtschaft', 'http://welt.de/wirtschaft/?service=Rss'),
  126.            ('Finanzen', 'http://welt.de/finanzen/?service=Rss'),
  127.            ('Sport', 'http://welt.de/sport/?service=Rss'),
  128.            ('Webwelt', 'http://welt.de/webwelt/?service=Rss'),
  129.            ('Kultur', 'http://welt.de/kultur/?service=Rss'),
  130.            ('Literarische Welt', 'http://welt.de/kultur/literarischewelt/?service=Rss'),
  131.            ('Wissenschaft', 'http://welt.de/wissenschaft/?service=Rss'),
  132.            ('Satire', 'http://welt.de/satire/?service=Rss'),
  133.            ('Motor', 'http://welt.de/motor/?service=Rss'),
  134.            ('Vermischtes', 'http://welt.de/vermischtes/?service=Rss')]
  135.  
  136.  
  137.     def print_version(self, url):
  138.         return url.replace ('.html', '.html?print=true')
  139.  
  140.  
  141.