home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / natgeo.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  72 lines

  1. #!/usr/bin/env  python
  2. # -*- coding: utf-8 -*-
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2011, gagsays <gagsays at gmail dot com>'
  5. '''
  6. nationalgeographic.com
  7. '''
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9. class NatGeo(BasicNewsRecipe):
  10.     title          = u'National Geographic'
  11.     description = 'Daily news articles from The National Geographic'
  12.     language = 'en'
  13.     oldest_article = 20
  14.     max_articles_per_feed = 25
  15.     encoding              = 'utf8'
  16.     publisher              = 'nationalgeographic.com'
  17.     category               = 'science, nat geo'
  18.     __author__           = 'gagsays'
  19.     masthead_url        = 'http://s.ngeo.com/wpf/sites/themes/global/i/presentation/ng_logo_small.png'
  20.     description           = 'Inspiring people to care about the planet since 1888'
  21.     timefmt = ' [%a, %d %b, %Y]'
  22.     no_stylesheets        = True
  23.     use_embedded_content  = False
  24.  
  25.     extra_css = '''
  26.                 body {color: #000000;font-size: medium;}
  27.                 h1 {color: #222222; font-size: large; font-weight:lighter; text-decoration:none; text-align: center;font-family:Georgia,Times New Roman,Times,serif;}
  28.           h2 {color: #454545; font-size: small; font-weight:lighter; text-decoration:none; text-align: justify; font-style:italic;font-family :Georgia,Times New Roman,Times,serif;}
  29.                 h3 {color: #555555; font-size: small; font-style:italic; margin-top: 10px;}
  30.                 img{margin-bottom: 0.25em;display:block;margin-left: auto;margin-right: auto;}
  31.                 a:link,a,.a,href {text-decoration: none;color: #000000;}
  32.                 .caption{color: #000000;font-size: xx-small;text-align: justify;font-weight:normal;}
  33.                 .credit{color: #555555;font-size: xx-small;text-align: left;font-weight:lighter;}
  34.                 p.author,p.publication{color: #000000;font-size: xx-small;text-align: left;display:inline;}
  35.                 p.publication_time{color: #000000;font-size: xx-small;text-align: right;text-decoration: underline;}
  36.                 p {margin-bottom: 0;}
  37.                 p + p {text-indent: 1.5em;margin-top: 0;}
  38.                 .hidden{display:none;}
  39.                 #page_head{text-transform:uppercase;}
  40.                '''
  41.  
  42.     def parse_feeds (self):
  43.       feeds = BasicNewsRecipe.parse_feeds(self)
  44.       for feed in feeds:
  45.         for article in feed.articles[:]:
  46.           if 'Presented' in article.title or 'Pictures' in article.title:
  47.             feed.articles.remove(article)
  48.       return feeds
  49.  
  50.     def preprocess_html(self, soup):
  51.         for alink in soup.findAll('a'):
  52.             if alink.string is not None:
  53.                tstr = alink.string
  54.                alink.replaceWith(tstr)
  55.         return soup
  56.  
  57.     remove_tags_before = dict(id='page_head')
  58.     keep_only_tags = [
  59.          dict(name='div',attrs={'id':['page_head','content_mainA']})
  60.     ]
  61.     remove_tags_after = [
  62.          dict(name='div',attrs={'class':['article_text','promo_collection']})
  63.     ]
  64.     remove_tags    = [
  65.                                dict(name='div', attrs={'class':['aside','primary full_width']})
  66.                                ,dict(name='div', attrs={'id':['header_search','navigation_mainB_wrap']})
  67.                                 ]
  68.     feeds = [
  69.                     (u'Daily News', u'http://feeds.nationalgeographic.com/ng/News/News_Main')
  70.                     ]
  71.  
  72.