home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
sportsillustrated_columnists.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
3KB
|
61 lines
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup
#from random import randint
from urllib import quote
class SportsIllustratedColumnistsRecipe(BasicNewsRecipe) :
title = u'Sports Illustrated Columnists'
__author__ = u'kwetal'
__license__ = u'GPL v3'
language = 'en'
version = 2
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
remove_javascript = True
feeds = []
# RSS sources found at http://sportsillustrated.cnn.com/services/rss/
feeds.append((u'Jon Heyman', u'http://rss.cnn.com/rss/si_jon_heyman.rss'))
feeds.append((u'Austin Murphy', u'http://rss.cnn.com/rss/si_austin_murphy.rss'))
feeds.append((u'Lars Anderson', u'http://rss.cnn.com/rss/si_lars_anderson.rss'))
feeds.append((u'Melissa Segura', u'http://rss.cnn.com/rss/si_melissa_segura.rss'))
feeds.append((u'Peter King', u'http://rss.cnn.com/rss/si_peter_king.rss'))
feeds.append((u'Scott Wraight', u'http://rss.cnn.com/rss/si_scott_wraight.rss'))
def print_version(self, url) :
# This is the url and the parameters that work to get the print version.
printUrl = 'http://si.printthis.clickability.com/pt/printThis?clickMap=printThis'
printUrl += '&fb=Y&partnerID=2356&url=' + quote(url)
return printUrl
# However the original javascript also uses the following parameters, but they can be left out:
# title : can be some random string
# random : some random number, but I think the number of digits is important
# expire : no idea what value to use
# All this comes from the Javascript function that redirects to the print version. It's called PT() and is defined in the file 48.js
def preprocess_html(self, soup) :
temp = soup.find('div', attrs = {'class' : 'cnnstoryheadline'})
if temp :
# It's an article, make a valid content container
homeMadeSoup = BeautifulSoup('<html><head></head><body></body></html>')
body = homeMadeSoup.find('body')
headline = temp.find('h1')
if headline :
body.append(headline)
for td in soup.findAll('td', attrs = {'class' : 'cnnstorycontentarea'}) :
for p in td.findAll('p') :
body.append(p)
return homeMadeSoup
else :
# It's a TOC, just return the whole lot
return soup