home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
- __license__ = 'GPL v3'
- __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
- __docformat__ = 'restructuredtext en'
- '''
- espn.com
- '''
- import re
- from calibre.web.feeds.news import BasicNewsRecipe
- from calibre.ptempfile import TemporaryFile
- class ESPN(BasicNewsRecipe):
- title = 'ESPN'
- description = 'Sports news'
- __author__ = 'Kovid Goyal and Sujata Raman'
- language = 'en'
- no_stylesheets = True
- use_embedded_content = False
- remove_javascript = True
- needs_subscription = True
- encoding= 'ISO-8859-1'
- remove_tags_before = dict(name='font', attrs={'class':'date'})
- center_navbar = False
- remove_tags = [
- dict(name='font', attrs={'class':'footer'}), dict(name='hr', noshade='noshade'),
- dict(name = 'img', src ='/winnercomm/horseracing/DRF.jpg')
- ]
- extra_css = '''
- body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; font-weight:normal;}
- .subhead{color:#666666;font-family:Verdana,sans-serif; font-size:x-small; font-weight:bold;}
- .clearfix{font-family:Verdana,sans-serif; font-size:xx-small; }
- .date{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#7A7A7A;}
- .byline{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#666666;}
- .headline{font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:large; font-weight:bold;}
- '''
- feeds = [
- ('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
- 'http://sports.espn.go.com/espn/rss/nfl/news',
- 'http://sports.espn.go.com/espn/rss/nba/news',
- 'http://sports.espn.go.com/espn/rss/mlb/news',
- 'http://sports.espn.go.com/espn/rss/nhl/news',
- 'http://sports.espn.go.com/espn/rss/golf/news',
- 'http://sports.espn.go.com/espn/rss/rpm/news',
- 'http://sports.espn.go.com/espn/rss/tennis/news',
- 'http://sports.espn.go.com/espn/rss/boxing/news',
- 'http://soccernet.espn.go.com/rss/news',
- 'http://sports.espn.go.com/espn/rss/ncb/news',
- 'http://sports.espn.go.com/espn/rss/ncf/news',
- 'http://sports.espn.go.com/espn/rss/ncaa/news',
- 'http://sports.espn.go.com/espn/rss/outdoors/news',
- #'http://sports.espn.go.com/espn/rss/bassmaster/news',
- 'http://sports.espn.go.com/espn/rss/oly/news',
- 'http://sports.espn.go.com/espn/rss/horse/news'
- ]
- def preprocess_html(self, soup):
- for div in soup.findAll('div'):
- if div.has_key('style') and 'px' in div['style']:
- div['style'] = ''
- return soup
- def postprocess_html(self, soup, first_fetch):
- for div in soup.findAll('div', style=True):
- div['style'] = div['style'].replace('center', 'left')
- return soup
- def get_browser(self):
- br = BasicNewsRecipe.get_browser()
- br.set_handle_refresh(False)
- url = ('https://r.espn.go.com/members/v3_1/login')
- raw = br.open(url).read()
- raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
- with TemporaryFile(suffix='.htm') as fname:
- with open(fname, 'wb') as f:
- f.write(raw)
- br.open_local_file(fname)
- br.form = br.forms().next()
- br.form.find_control(name='username', type='text').value = self.username
- br.form['password'] = self.password
- br.submit().read()
- br.open('http://espn.go.com').read()
- br.set_handle_refresh(True)
- return br
- def get_article_url(self, article):
- return article.get('guid', None)
- def print_version(self, url):
- if 'eticket' in url:
- return url.partition('&')[0].replace('story?', 'print?')
- match = re.search(r'story\?(id=\d+)', url)
- if match and 'soccernet' not in url and 'bassmaster' not in url:
- return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
- else:
- if 'soccernet' in url:
- match = re.search(r'/id/(\d+)/', url)
- if match:
- return \
- 'http://soccernet.espn.go.com/print?id=%s&type=story' % match.group(1)
- #else:
- # if 'bassmaster' in url:
- # return url
- return None