home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / seattle_times.recipe < prev    next >
Text File  |  2011-09-09  |  5KB  |  93 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. seattletimes.nwsource.com
  7. '''
  8.  
  9. from calibre.web.feeds.news import BasicNewsRecipe
  10.  
  11. class SeattleTimes(BasicNewsRecipe):
  12.     title                 = 'The Seattle Times'
  13.     __author__            = 'Darko Miletic'
  14.     description           = 'News from Seattle and USA'
  15.     publisher             = 'The Seattle Times'
  16.     category              = 'news, politics, USA'
  17.     oldest_article        = 2
  18.     max_articles_per_feed = 100
  19.     no_stylesheets        = True
  20.     use_embedded_content  = False
  21.     encoding              = 'cp1252'
  22.     language = 'en'
  23.  
  24.     feeds              = [
  25.                           (u'Top Stories',
  26.                               u'http://seattletimes.nwsource.com/rss/home.xml'),
  27.                           #(u'Articles', u'http://seattletimes.nwsource.com/rss/seattletimes.xml')
  28.                           (u'Business & Technology',
  29.                               u'http://seattletimes.nwsource.com/rss/businesstechnology.xml'),
  30.                           (u'Personal Technology',
  31.                               u'http://seattletimes.nwsource.com/rss/personaltechnology.xml'),
  32.                           (u'Entertainment & the Arts',
  33.                               u'http://seattletimes.nwsource.com/rss/artsentertainment.xml'),
  34.                           (u'Health',
  35.                               u'http://seattletimes.nwsource.com/rss/health.xml'),
  36.                           (u'Living',
  37.                               u'http://seattletimes.nwsource.com/rss/living.xml'),
  38.                           (u'Local News',
  39.                               u'http://seattletimes.nwsource.com/rss/localnews.xml'),
  40.                           (u'Nation & World',
  41.                               u'http://seattletimes.nwsource.com/rss/nationworld.xml'),
  42.                           (u'Opinion',
  43.                               u'http://seattletimes.nwsource.com/rss/opinion.xml'),
  44.                           (u'Politics',
  45.                               u'http://seattletimes.nwsource.com/rss/politics.xml'),
  46.                           (u'Sports',
  47.                               u'http://seattletimes.nwsource.com/rss/sports.xml'),
  48.                           (u'Nicole Brodeur',
  49.                               u'http://seattletimes.nwsource.com/rss/nicolebrodeur.xml'),
  50.                           (u'Danny Westneat',
  51.                               u'http://seattletimes.nwsource.com/rss/dannywestneat.xml'),
  52.                           (u'Jerry Large',
  53.                               u'http://seattletimes.nwsource.com/rss/jerrylarge.xml'),
  54.                           (u'Ron Judd',
  55.                               u'http://seattletimes.nwsource.com/rss/ronjudd.xml'),
  56.                           (u'Education',
  57.                               u'http://seattletimes.nwsource.com/rss/education.xml'),
  58.                           (u'Letters to the Editor',
  59.                               u'http://seattletimes.nwsource.com/rss/northwestvoices.xml'),
  60.                           (u'Travel',
  61.                               u'http://seattletimes.nwsource.com/rss/travel.xml'),
  62.                           (u'Outdoors',
  63.                               u'http://seattletimes.nwsource.com/rss/outdoors.xml'),
  64.                           (u'Steve Kelley',
  65.                               u'http://seattletimes.nwsource.com/rss/stevekelley.xml'),
  66.                           (u'Jerry Brewer',
  67.                               u'http://seattletimes.nwsource.com/rss/jerrybrewer.xml'),
  68.                           (u'Most Read Articles',
  69.                               u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
  70.                          ]
  71.  
  72.     keep_only_tags = [dict(id='content')]
  73.     remove_tags        = [
  74.                              dict(name=['object','link','script']),
  75.                             {'class':['permission', 'note', 'bottomtools',
  76.                                 'homedelivery']},
  77.                             dict(id=["rightcolumn", 'footer', 'adbottom']),
  78.                          ]
  79.  
  80.     def print_version(self, url):
  81.         return url
  82.         start_url, sep, rest_url = url.rpartition('_')
  83.         rurl, rsep, article_id = start_url.rpartition('/')
  84.         return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
  85.  
  86.     def preprocess_html(self, soup):
  87.         mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
  88.         soup.head.insert(0,mtag)
  89.         for item in soup.findAll(style=True):
  90.             del item['style']
  91.         return soup
  92.  
  93.