home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / chicago_tribune.recipe < prev    next >
Text File  |  2011-09-09  |  6KB  |  92 lines

  1. from __future__ import with_statement
  2. __license__ = 'GPL 3'
  3. __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
  4. __docformat__ = 'restructuredtext en'
  5.  
  6. from calibre.web.feeds.news import BasicNewsRecipe
  7.  
  8. class ChicagoTribune(BasicNewsRecipe):
  9.  
  10.     title       = 'Chicago Tribune'
  11.     __author__  = 'Kovid Goyal and Sujata Raman'
  12.     description = 'Politics, local and business news from Chicago'
  13.     language = 'en'
  14.  
  15.     use_embedded_content    = False
  16.     no_stylesheets        = True
  17.     remove_javascript = True
  18.  
  19.     keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
  20.                       dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
  21.                            ]
  22.     remove_tags_after = [    {'class':['photo_article',]} ]
  23.  
  24.     remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]},
  25.                    {'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
  26.                    dict(name='font',attrs={'id':["cr-other-headlines"]})]
  27.     extra_css = '''
  28.                     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
  29.                     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
  30.                     .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
  31.                     .date {font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
  32.                     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  33.                     .copyright {font-family:Arial,Helvetica,sans-serif;font-size:xx-small;text-align:center}
  34.                     .story{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  35.                     .entry-asset asset hentry{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  36.                     .pagebody{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  37.                     .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  38.                     .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  39.                     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
  40.         '''
  41.     feeds = [
  42.              ('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
  43.              ('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
  44.              ('Nation/world', 'http://feeds.chicagotribune.com/chicagotribune/news/nationworld/'),
  45.              ('Hot topics', 'http://feeds.chicagotribune.com/chicagotribune/hottopics/'),
  46.              ('Most E-mailed stories', 'http://feeds.chicagotribune.com/chicagotribune/email/'),
  47.              ('Opinion', 'http://feeds.chicagotribune.com/chicagotribune/opinion/'),
  48.              ('Off Topic', 'http://feeds.chicagotribune.com/chicagotribune/offtopic/'),
  49.              #('Politics', 'http://feeds.chicagotribune.com/chicagotribune/politics/'),
  50.              #('Special Reports', 'http://feeds.chicagotribune.com/chicagotribune/special/'),
  51.              #('Religion News', 'http://feeds.chicagotribune.com/chicagotribune/religion/'),
  52.              ('Business news', 'http://feeds.chicagotribune.com/chicagotribune/business/'),
  53.              ('Jobs and Careers', 'http://feeds.chicagotribune.com/chicagotribune/career/'),
  54.              ('Local scene', 'http://feeds.chicagotribune.com/chicagohomes/localscene/'),
  55.              ('Phil Rosenthal', 'http://feeds.chicagotribune.com/chicagotribune/rosenthal/'),
  56.              #('Tech Buzz', 'http://feeds.chicagotribune.com/chicagotribune/techbuzz/'),
  57.              ('Your Money', 'http://feeds.chicagotribune.com/chicagotribune/yourmoney/'),
  58.              ('Jon Hilkevitch - Getting around', 'http://feeds.chicagotribune.com/chicagotribune/gettingaround/'),
  59.              ('Jon Yates - What\'s your problem?', 'http://feeds.chicagotribune.com/chicagotribune/problem/'),
  60.              ('Garisson Keillor', 'http://feeds.chicagotribune.com/chicagotribune/keillor/'),
  61.              ('Marks Jarvis - On Money', 'http://feeds.chicagotribune.com/chicagotribune/marksjarvisonmoney/'),
  62.              ('Sports', 'http://feeds.chicagotribune.com/chicagotribune/sports/'),
  63.              ('Arts and Architecture', 'http://feeds.chicagotribune.com/chicagotribune/arts/'),
  64.              ('Books', 'http://feeds.chicagotribune.com/chicagotribune/books/'),
  65.              #('Magazine', 'http://feeds.chicagotribune.com/chicagotribune/magazine/'),
  66.              ('Movies', 'http://feeds.chicagotribune.com/chicagotribune/movies/'),
  67.              ('Music', 'http://feeds.chicagotribune.com/chicagotribune/music/'),
  68.              ('TV', 'http://feeds.chicagotribune.com/chicagotribune/tv/'),
  69.              ('Hypertext', 'http://feeds.chicagotribune.com/chicagotribune/hypertext/'),
  70.              ('iPhone Blog', 'http://feeds.feedburner.com/redeye/iphoneblog'),
  71.              ('Julie\'s Health Club', 'http://feeds.chicagotribune.com/chicagotribune_julieshealthclub/'),
  72.              ]
  73.  
  74.  
  75.     def get_article_url(self, article):
  76.         print article.get('feedburner_origlink', article.get('guid', article.get('link')))
  77.         return article.get('feedburner_origlink', article.get('guid', article.get('link')))
  78.  
  79.  
  80.     def postprocess_html(self, soup, first_fetch):
  81.         for t in soup.findAll(['table', 'tr', 'td']):
  82.             t.name = 'div'
  83.  
  84.         for tag in soup.findAll('form', dict(attrs={'name':["comments_form"]})):
  85.             tag.extract()
  86.         for tag in soup.findAll('font', dict(attrs={'id':["cr-other-headlines"]})):
  87.             tag.extract()
  88.  
  89.         return soup
  90.  
  91.  
  92.