home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / calcalist.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  44 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2. from calibre.ebooks.BeautifulSoup import re
  3.  
  4. class AdvancedUserRecipe1283848012(BasicNewsRecipe):
  5.     description   = 'This is a recipe of Calcalist.co.il. The recipe downloads the article page to not hurt the sites advertising income.'
  6.     cover_url      = 'http://ftp5.bizportal.co.il/web/giflib/news/calcalist.JPG'
  7.     title          = u'Calcalist'
  8.     language              = 'he'
  9.     __author__ = 'marbs'
  10.     extra_css='img {max-width:100%;} body{direction: rtl;},title{direction: rtl; } ,article_description{direction: rtl; }, a.article{direction: rtl; } ,calibre_feed_description{direction: rtl; }'
  11.     simultaneous_downloads = 5
  12.     remove_javascript     = True
  13.     timefmt        = '[%a, %d %b, %Y]'
  14.     oldest_article = 1
  15.     max_articles_per_feed = 100
  16.     remove_attributes = ['width']
  17.     simultaneous_downloads = 5
  18.     keep_only_tags =dict(name='div', attrs={'id':'articleContainer'})
  19.     remove_tags = [dict(name='p', attrs={'text':[' ']})]
  20.     max_articles_per_feed = 100
  21.     preprocess_regexps = [
  22.         (re.compile(r'<p> </p>', re.DOTALL|re.IGNORECASE), lambda match: '')
  23.         ]
  24.  
  25.  
  26.     feeds          = [(u'\u05d3\u05e3 \u05d4\u05d1\u05d9\u05ea', u'http://www.calcalist.co.il/integration/StoryRss8.xml'),
  27.                            (u'24/7', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
  28.                            (u'\u05d1\u05d0\u05d6\u05d6', u'http://www.calcalist.co.il/integration/StoryRss3674.xml'),
  29.                            (u'\u05de\u05d1\u05d6\u05e7\u05d9\u05dd', u'http://www.calcalist.co.il/integration/StoryRss184.xml'),
  30.                            (u'\u05d4\u05e9\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss2.xml'),
  31.                            (u'\u05d1\u05d0\u05e8\u05e5', u'http://www.calcalist.co.il/integration/StoryRss14.xml'),
  32.                            (u'\u05d4\u05db\u05e1\u05e3', u'http://www.calcalist.co.il/integration/StoryRss9.xml'),
  33.                            (u'\u05e0\u05d3\u05dc"\u05df', u'http://www.calcalist.co.il/integration/StoryRss7.xml'),
  34.                            (u'\u05e2\u05d5\u05dc\u05dd', u'http://www.calcalist.co.il/integration/StoryRss13.xml'),
  35.                            (u'\u05e4\u05e8\u05e1\u05d5\u05dd \u05d5\u05e9\u05d9\u05d5\u05d5\u05e7', u'http://www.calcalist.co.il/integration/StoryRss5.xml'),
  36.                            (u'\u05e4\u05e0\u05d0\u05d9', u'http://www.calcalist.co.il/integration/StoryRss3.xml'),
  37.                            (u'\u05d8\u05db\u05e0\u05d5\u05dc\u05d5\u05d2\u05d9', u'http://www.calcalist.co.il/integration/StoryRss4.xml'),
  38.                            (u'\u05e2\u05e1\u05e7\u05d9 \u05e1\u05e4\u05d5\u05e8\u05d8', u'http://www.calcalist.co.il/integration/StoryRss18.xml')]
  39.  
  40.     def print_version(self, url):
  41.         split1 = url.split("-")
  42.         print_url = 'http://www.calcalist.co.il/Ext/Comp/ArticleLayout/CdaArticlePrintPreview/1,2506,L-' + split1[1]
  43.         return print_url
  44.