home *** CD-ROM | disk | FTP | other *** search
- import re
- from calibre.web.feeds.news import BasicNewsRecipe
- from calibre.ebooks.BeautifulSoup import Tag
-
- class AdvancedUserRecipe1268409464(BasicNewsRecipe):
- title = u'The Sun'
- __author__ = 'Chaz Ralph'
- description = 'News from The Sun'
- oldest_article = 1
- max_articles_per_feed = 100
- language = 'en'
- no_stylesheets = True
- extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
- encoding= 'iso-8859-1'
- remove_javascript = True
-
- keep_only_tags = [
- dict(id='column-print')
- ]
-
- remove_tags = [
- dict(name='div', attrs={'class':[
- 'clear text-center small padding-left-right-5 text-999 padding-top-5 padding-bottom-10 grey-solid-line',
- 'clear width-625 bg-fff padding-top-10'
- ]}),
- dict(name='video'),
- ]
-
- def preprocess_html(self, soup):
- h1 = soup.find('h1')
- if h1 is not None:
- text = self.tag_to_string(h1)
- nh = Tag(soup, 'h1')
- nh.insert(0, text)
- h1.replaceWith(nh)
-
- return soup
-
-
- feeds = [(u'News', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article312900.ece')
- ,(u'Sport', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247732.ece')
- ,(u'Football', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247739.ece')
- ,(u'Gizmo', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247829.ece')
- ,(u'Bizarre', u'http://www.thesun.co.uk/sol/homepage/feeds/rss/article247767.ece')]
-
- def print_version(self, url):
- return re.sub(r'\?OTC-RSS&ATTR=[-a-zA-Z]+', '?print=yes', url)
-
-
-