home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / espn.recipe < prev    next >
Encoding:
Text File  |  2011-09-09  |  4.4 KB  |  122 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
  4. __docformat__ = 'restructuredtext en'
  5.  
  6. '''
  7. espn.com
  8. '''
  9. import re
  10. from calibre.web.feeds.news import BasicNewsRecipe
  11. from calibre.ptempfile import TemporaryFile
  12.  
  13. class ESPN(BasicNewsRecipe):
  14.  
  15.     title       = 'ESPN'
  16.     description = 'Sports news'
  17.     __author__  = 'Kovid Goyal and Sujata Raman'
  18.     language = 'en'
  19.     no_stylesheets = True
  20.  
  21.     use_embedded_content = False
  22.     remove_javascript     = True
  23.     needs_subscription = True
  24.     encoding= 'ISO-8859-1'
  25.  
  26.     remove_tags_before = dict(name='font', attrs={'class':'date'})
  27.     center_navbar = False
  28.     remove_tags = [
  29.                     dict(name='font', attrs={'class':'footer'}), dict(name='hr', noshade='noshade'),
  30.                     dict(name = 'img', src ='/winnercomm/horseracing/DRF.jpg')
  31.                    ]
  32.  
  33.  
  34.     extra_css = '''
  35.                 body{font-family:Verdana,Arial,Helvetica,sans-serif; font-size:x-small; font-weight:normal;}
  36.                 .subhead{color:#666666;font-family:Verdana,sans-serif; font-size:x-small; font-weight:bold;}
  37.                 .clearfix{font-family:Verdana,sans-serif; font-size:xx-small; }
  38.                 .date{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#7A7A7A;}
  39.                 .byline{ font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:xx-small;color:#666666;}
  40.                 .headline{font-family:Verdana,Arial,Helvetica,sans-serif ; font-size:large; font-weight:bold;}
  41.                 '''
  42.  
  43.  
  44.     feeds = [
  45.             ('Top Headlines', 'http://sports.espn.go.com/espn/rss/news'),
  46.              'http://sports.espn.go.com/espn/rss/nfl/news',
  47.              'http://sports.espn.go.com/espn/rss/nba/news',
  48.              'http://sports.espn.go.com/espn/rss/mlb/news',
  49.              'http://sports.espn.go.com/espn/rss/nhl/news',
  50.              'http://sports.espn.go.com/espn/rss/golf/news',
  51.              'http://sports.espn.go.com/espn/rss/rpm/news',
  52.              'http://sports.espn.go.com/espn/rss/tennis/news',
  53.              'http://sports.espn.go.com/espn/rss/boxing/news',
  54.              'http://soccernet.espn.go.com/rss/news',
  55.              'http://sports.espn.go.com/espn/rss/ncb/news',
  56.              'http://sports.espn.go.com/espn/rss/ncf/news',
  57.              'http://sports.espn.go.com/espn/rss/ncaa/news',
  58.              'http://sports.espn.go.com/espn/rss/outdoors/news',
  59.              #'http://sports.espn.go.com/espn/rss/bassmaster/news',
  60.              'http://sports.espn.go.com/espn/rss/oly/news',
  61.              'http://sports.espn.go.com/espn/rss/horse/news'
  62.              ]
  63.  
  64.  
  65.     def preprocess_html(self, soup):
  66.         for div in soup.findAll('div'):
  67.             if div.has_key('style') and 'px' in div['style']:
  68.                 div['style'] = ''
  69.  
  70.         return soup
  71.  
  72.     def postprocess_html(self, soup, first_fetch):
  73.         for div in soup.findAll('div', style=True):
  74.             div['style'] = div['style'].replace('center', 'left')
  75.  
  76.         return soup
  77.  
  78.  
  79.  
  80.     def get_browser(self):
  81.         br = BasicNewsRecipe.get_browser()
  82.         br.set_handle_refresh(False)
  83.         url = ('https://r.espn.go.com/members/v3_1/login')
  84.         raw = br.open(url).read()
  85.         raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
  86.         with TemporaryFile(suffix='.htm') as fname:
  87.             with open(fname, 'wb') as f:
  88.                 f.write(raw)
  89.             br.open_local_file(fname)
  90.  
  91.         br.form = br.forms().next()
  92.         br.form.find_control(name='username', type='text').value = self.username
  93.         br.form['password'] = self.password
  94.         br.submit().read()
  95.         br.open('http://espn.go.com').read()
  96.         br.set_handle_refresh(True)
  97.         return br
  98.  
  99.     def get_article_url(self, article):
  100.         return article.get('guid',  None)
  101.  
  102.     def print_version(self, url):
  103.  
  104.         if 'eticket' in url:
  105.             return url.partition('&')[0].replace('story?', 'print?')
  106.         match = re.search(r'story\?(id=\d+)', url)
  107.  
  108.         if match and 'soccernet'  not in url and 'bassmaster' not in url:
  109.             return 'http://sports.espn.go.com/espn/print?'+match.group(1)+'&type=story'
  110.         else:
  111.             if 'soccernet' in url:
  112.                 match = re.search(r'/id/(\d+)/', url)
  113.                 if match:
  114.                     return \
  115.                         'http://soccernet.espn.go.com/print?id=%s&type=story' % match.group(1)
  116.             #else:
  117.             #    if 'bassmaster' in url:
  118.             #        return url
  119.  
  120.         return None
  121.  
  122.