home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / statesman.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  41 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2.  
  3. class AdvancedUserRecipe1278049615(BasicNewsRecipe):
  4.     title          = u'Statesman'
  5.     pubisher  = 'http://www.statesman.com/'
  6.     description           = 'Austin Texas Daily Newspaper'
  7.     category              = 'News, Austin, Texas'
  8.     __author__            = 'rty'
  9.     oldest_article = 3
  10.  
  11.     max_articles_per_feed = 100
  12.  
  13.     feeds          = [(u'News',
  14.         u'http://www.statesman.com/section-rss.do?source=news&includeSubSections=true'),
  15.     (u'Local', u'http://www.statesman.com/section-rss.do?source=local&includeSubSections=true'),
  16.     (u'Business', u'http://www.statesman.com/section-rss.do?source=business&includeSubSections=true'),
  17.     (u'Life', u'http://www.statesman.com/section-rss.do?source=life&includesubsection=true'),
  18.     (u'Editorial', u'http://www.statesman.com/section-rss.do?source=opinion&includesubsections=true'),
  19.     (u'Sports', u'http://www.statesman.com/section-rss.do?source=sports&includeSubSections=true')
  20.     ]
  21.     masthead_url = "http://www.statesman.com/images/cmg-logo.gif"
  22.     #temp_files = []
  23.     #articles_are_obfuscated = True
  24.  
  25.     remove_javascript = True
  26.     use_embedded_content   = False
  27.     no_stylesheets = True
  28.     language = 'en'
  29.     encoding               = 'utf-8'
  30.     conversion_options = {'linearize_tables':True}
  31.     remove_tags = [
  32.                     dict(name='div', attrs={'id':'cxArticleOptions'}),
  33.                     {'class':['perma', 'comments', 'trail', 'share-buttons',
  34.                         'toggle_show_on']},
  35.                         ]
  36.     keep_only_tags = [
  37.      dict(name='div', attrs={'class':'cxArticleHeader'}),
  38.                      dict(name='div', attrs={'id':['cxArticleBodyText',
  39.                          'content']}),
  40.                                ]
  41.