home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / ifzm.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  51 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2.  
  3. class AdvancedUserRecipe1277305250(BasicNewsRecipe):
  4.     title          = u'infzm - China Southern Weekly'
  5.     oldest_article = 14
  6.     max_articles_per_feed = 100
  7.  
  8.     feeds          = [(u'\u5357\u65b9\u5468\u672b-\u70ed\u70b9\u65b0\u95fb', u'http://www.infzm.com/rss/home/rss2.0.xml'),
  9.     (u'\u5357\u65b9\u5468\u672b-\u7ecf\u6d4e\u65b0\u95fb', u'http://www.infzm.com/rss/economic.xml'),
  10.     (u'\u5357\u65b9\u5468\u672b-\u6587\u5316\u65b0\u95fb', u'http://www.infzm.com/rss/culture.xml'),
  11.     (u'\u5357\u65b9\u5468\u672b-\u751f\u6d3b\u65f6\u5c1a', u'http://www.infzm.com/rss/lifestyle.xml'),
  12.     (u'\u5357\u65b9\u5468\u672b-\u89c2\u70b9', u'http://www.infzm.com/rss/opinion.xml')
  13.     ]
  14.     __author__            = 'rty'
  15.     __version__            = '1.0'
  16.     language = 'zh'
  17.     pubisher  = 'http://www.infzm.com'
  18.     description           = 'Chinese Weekly Tabloid'
  19.     category              = 'News, China'
  20.     remove_javascript = True
  21.     use_embedded_content   = False
  22.     no_stylesheets = True
  23.     #encoding               = 'GB2312'
  24.     encoding               = 'UTF-8'
  25.     conversion_options = {'linearize_tables':True}
  26.     masthead_url = 'http://i50.tinypic.com/2qmfb7l.jpg'
  27.  
  28.     extra_css = '''
  29.              @font-face { font-family: "DroidFont", serif, sans-serif;  src: url(res:///system/fonts/DroidSansFallback.ttf); }\n
  30.              body {
  31.                   margin-right: 8pt;
  32.                   font-family: 'DroidFont', serif;}
  33.              .detailContent {font-family: 'DroidFont', serif, sans-serif}
  34.             '''
  35.  
  36.     keep_only_tags = [
  37.                               dict(name='div', attrs={'id':'detailContent'}),
  38.                                ]
  39.     remove_tags = [
  40.                     dict(name='div', attrs={'id':['detailTools', 'detailSideL', 'pageNum']}),
  41.                          ]
  42.     remove_tags_after = [
  43.                   dict(name='div', attrs={'id':'pageNum'}),
  44.                          ]
  45.     def preprocess_html(self, soup):
  46.         for item in soup.findAll(color=True):
  47.             del item['font']
  48.         for item in soup.findAll(style=True):
  49.             del item['style']
  50.         return soup
  51.