Chip 2011 November

home *** CD-ROM | disk | FTP | other *** search

/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / cbn.recipe < prev next >

Wrap

Text File | 2011-09-09 | 3KB | 74 lines

from calibre.web.feeds.news import BasicNewsRecipe class CBN(BasicNewsRecipe): title = u'CBN News' __author__ = 'Roger' # TODO: I just noticed this is downloading 25+ articles, while # the online site is only publishing at most 7 articles daily. # So, somehow this needs to be fixed it only downloads max 7 articles oldest_article = 7 max_articles_per_feed = 100 description = 'The Christian Broadcasting Network' publisher = 'http://www.cbn.com/' category = 'news, religion, spiritual, christian' language = 'en' # Make article titles, author and date bold, italic or small font. # TODO: Could use a smaller title text # TODO: Italicize Author and Publisher? # # http://www.cbn.com/App_Themes/Common/base.css, # http://www.cbn.com/App_Themes/CBNNews/article.css", # ... and many more style sheets. #extra_css = ''' # .story_item_headline { font-size: medium; font-weight: bold; } # .story_item_author { font-size: small; font-style:italic; } # .signature_line { font-size: small; } # ''' remove_javascript = True use_embedded_content = False no_stylesheets = True language = 'en' encoding = 'iso-8859-1' conversion_options = {'linearize_tables':True} # TODO: No masterhead_url for CBN, using one I grepped from a news article # (There's a better/higher contrast blue on white background image, but # can't get it or it's too big -- embedded into a larger jpeg?) masthead_url = 'http://www.cbn.com/templates/images/cbn_com_logo.jpg' keep_only_tags = [ dict(name='h1', attrs={'id':'articleTitle'}), dict(name='div', attrs={'class':'articleAuthor'}), dict(name='div', attrs={'class':'articleDate'}), dict(name='div', attrs={'class':'articleText'}), ] remove_tags = [ # The article image is usually Adobe Flash Player Image # The snapshot .jpg image files of the video are found # within a URL folder named "PageFiles_Files" # Filter this for now. # (Majority of images seem to be Adobe Flash.) dict(name='div', attrs={'class':'articleImage'}), ] # Comment-out or uncomment any of the following RSS feeds according to your # liking. # A full list can be found here: http://www.cbn.com/rss.aspx feeds = [ (u'World', u'http://www.cbn.com/cbnnews/world/feed/'), (u'US', u'http://www.cbn.com/cbnnews/us/feed/'), (u'Inside Israel', u'http://www.cbn.com/cbnnews/insideisrael/feed/'), (u'Politics', u'http://www.cbn.com/cbnnews/politics/feed/'), (u'Christian World News', u'http://www.cbn.com/cbnnews/shows/cwn/feed/'), (u'Health and Science', u'http://www.cbn.com/cbnnews/healthscience/feed/'), (u'Finance', u'http://www.cbn.com/cbnnews/finance/feed/'), ]