home *** CD-ROM | disk | FTP | other *** search
- __license__ = 'GPL v3'
- __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
- '''
- Fetch Weltonline.
- '''
-
- from calibre.web.feeds.news import BasicNewsRecipe
-
-
- class weltDe(BasicNewsRecipe):
-
- title = 'Weltonline'
- description = 'german newspaper'
- language = 'de'
- __author__ = 'Oliver Niesner'
- use_embedded_content = False
- timefmt = ' [%d %b %Y]'
- max_articles_per_feed = 15
- linearize_tables = True
- no_stylesheets = True
- remove_stylesheets = True
- remove_javascript = True
- encoding = 'utf-8'
- html2epub_options = 'base_font_size=10'
- BasicNewsRecipe.summary_length = 100
-
-
- remove_tags = [dict(id='jumplinks'),
- dict(id='ad1'),
- dict(id='top'),
- dict(id='header'),
- dict(id='additionalNavWrapper'),
- dict(id='fullimage_index'),
- dict(id='additionalNav'),
- dict(id='printMenu'),
- dict(id='topteaser1'),
- dict(id='topteaser2'),
- dict(id='servicesBox'),
- dict(id='servicesNav'),
- dict(id='ad2'),
- dict(id='banner_1'),
- dict(id='ssoInfoTop'),
- dict(id='brandingWrapper'),
- dict(id='links-intern'),
- dict(id='navigation'),
- dict(id='subNav'),
- dict(id='branding'),
- dict(id='servicesNav'),
- dict(id='searchArea'),
- dict(id='servicesBox'),
- dict(id='toggleAdvancedSearch'),
- dict(id='mainNav'),
- dict(id='articleInlineMediaBox0'),
- dict(id='sectionSponsor'),
- dict(id='sprucharea'),
- dict(id='xmsg_recommendEmail'),
- dict(id='xmsg_recommendSms'),
- dict(id='xmsg_comment'),
- dict(id='additionalNavWrapper'),
- dict(id='imagebox'),
- dict(id='footerContainer'),
- #dict(id=''),
- dict(name='span'),
- dict(name='div', attrs={'class':'printURL'}),
- dict(name='ul', attrs={'class':'clear mainNavigation inline'}),
- dict(name='ul', attrs={'class':'inline'}),
- dict(name='ul', attrs={'class':'ubar'}),
- dict(name='hr', attrs={'class':'ubar'}),
- dict(name='li', attrs={'class':'counter'}),
- dict(name='li', attrs={'class':'browseBack'}),
- dict(name='li', attrs={'class':'browseNext'}),
- dict(name='li', attrs={'class':'selected'}),
- dict(name='div', attrs={'class':'floatLeft'}),
- dict(name='div', attrs={'class':'ad'}),
- dict(name='div', attrs={'class':'ftBarLeft'}),
- dict(name='div', attrs={'class':'clear additionalNav'}),
- dict(name='div', attrs={'class':'inlineBox inlineFurtherLinks'}),
- dict(name='div', attrs={'class':'inlineBox videoInlineBox'}),
- dict(name='div', attrs={'class':'inlineGallery'}),
- dict(name='div', attrs={'class':'ratingBox'}),
- dict(name='div', attrs={'class':'socialBookmarks clear'}),
- dict(name='div', attrs={'class':'articleOptions clear'}),
- dict(name='div', attrs={'class':'noPrint galleryIndex'}),
- dict(name='div', attrs={'class':'inlineBox inlineTagCloud'}),
- dict(name='div', attrs={'class':'clear module imageGalleryBig bgColor1'}),
- dict(name='div', attrs={'class':'clear module writeComment bgColor1'}),
- dict(name='div', attrs={'class':'clear module textGallery bgColor1'}),
- dict(name='div', attrs={'class':'moreArtH3'}),
- dict(name='div', attrs={'class':'jqmWindow'}),
- dict(name='div', attrs={'class':'clear gap4'}),
- dict(name='div', attrs={'class':'hidden'}),
- dict(name='div', attrs={'class':'advertising'}),
- dict(name='div', attrs={'class':'ad adMarginBottom'}),
- dict(name='div', attrs={'class':'ad'}),
- dict(name='div', attrs={'class':'topLine'}),
- dict(name='div', attrs={'class':'toplineH2'}),
- dict(name='div', attrs={'class':'headLineH3'}),
- dict(name='div', attrs={'class':'print'}),
- dict(name='div', attrs={'class':'clear menu'}),
- dict(name='div', attrs={'class':'themenalarm'}),
- dict(name='p', attrs={'class':'jump'}),
- dict(name='a', attrs={'class':'commentLink'}),
- dict(name='h2', attrs={'class':'jumpHeading'}),
- dict(name='ul', attrs={'class':'optionsSubNav clear'}),
- dict(name='li', attrs={'class':'next'}),
- dict(name='li', attrs={'class':'prev'}),
- dict(name='li', attrs={'class':'last'}),
- dict(name='table', attrs={'class':'textGallery'}),
- dict(name='li', attrs={'class':'active'})]
-
- remove_tags_after = [dict(name='div', attrs={'class':'themenalarm'})]
-
- extra_css = '''
- h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
- a{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-style:italic;}
- .dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
- h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
- .artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
- body{font-family:Arial,Helvetica,sans-serif; }
- .photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
-
- feeds = [ ('Politik', 'http://welt.de/politik/?service=Rss'),
- ('Deutsche Dinge', 'http://www.welt.de/deutsche-dinge/?service=Rss'),
- ('Wirtschaft', 'http://welt.de/wirtschaft/?service=Rss'),
- ('Finanzen', 'http://welt.de/finanzen/?service=Rss'),
- ('Sport', 'http://welt.de/sport/?service=Rss'),
- ('Webwelt', 'http://welt.de/webwelt/?service=Rss'),
- ('Kultur', 'http://welt.de/kultur/?service=Rss'),
- ('Literarische Welt', 'http://welt.de/kultur/literarischewelt/?service=Rss'),
- ('Wissenschaft', 'http://welt.de/wissenschaft/?service=Rss'),
- ('Satire', 'http://welt.de/satire/?service=Rss'),
- ('Motor', 'http://welt.de/motor/?service=Rss'),
- ('Vermischtes', 'http://welt.de/vermischtes/?service=Rss')]
-
-
- def print_version(self, url):
- return url.replace ('.html', '.html?print=true')
-
-
-