home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / people_us_mashup.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  95 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. '''
  4. '''
  5. from calibre.web.feeds.recipes import BasicNewsRecipe
  6.  
  7. class PeopleMag(BasicNewsRecipe):
  8.  
  9.     title       = 'People/US Magazine Mashup'
  10.     __author__  = 'BrianG'
  11.     language = 'en'
  12.     description = 'Headlines from People and US Magazine'
  13.     no_stylesheets        = True
  14.     use_embedded_content  = False
  15.     oldest_article = 2
  16.     max_articles_per_feed = 50
  17.  
  18.     extra_css = '''
  19.         h1{font-family:verdana,arial,helvetica,sans-serif; font-size: large;}
  20.         h2{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
  21.         .body-content{font-family:verdana,arial,helvetica,sans-serif; font-size: small;}
  22.         .byline {font-size: small; color: #666666; font-style:italic; }
  23.         .lastline {font-size: small; color: #666666; font-style:italic;}
  24.         .contact {font-size: small; color: #666666;}
  25.         .contact p {font-size: small; color: #666666;}
  26.         .photoCaption { font-family:verdana,arial,helvetica,sans-serif; font-size:x-small;}
  27.         .photoCredit{ font-family:verdana,arial,helvetica,sans-serif; font-size:x-small; color:#666666;}
  28.         .article_timestamp{font-size:x-small; color:#666666;}
  29.         a {font-family:verdana,arial,helvetica,sans-serif; font-size: x-small;}
  30.                 '''
  31.  
  32.  
  33.     keep_only_tags = [
  34.               dict(name='div', attrs={'class': 'panel_news_article_main'}),     
  35.             dict(name='div', attrs={'class':'article_content'}),
  36.               dict(name='div', attrs={'class': 'headline'}),
  37.               dict(name='div', attrs={'class': 'post'}),
  38.               dict(name='div', attrs={'class': 'packageheadlines'}),
  39.               dict(name='div', attrs={'class': 'snap_preview'}),
  40.               dict(name='div', attrs={'id': 'articlebody'})
  41.                    ]
  42.  
  43.     remove_tags = [
  44.          dict(name='div', attrs={'class':'share_comments'}),
  45.          dict(name='p', attrs={'class':'twitter_facebook'}),
  46.          dict(name='div', attrs={'class':'share_comments_bottom'}),
  47.          dict(name='h2', attrs={'id':'related_content'}),
  48.           dict(name='div', attrs={'class':'next_article'}),
  49.        dict(name='div', attrs={'class':'prev_article'}),
  50.            dict(name='ul', attrs={'id':'sharebar'}),
  51.          dict(name='div', attrs={'class':'sharelinkcont'}),
  52.          dict(name='div', attrs={'class':'categories'}),
  53.          dict(name='ul', attrs={'class':'categories'}),
  54.          dict(name='div', attrs={'class':'related_content'}),
  55.          dict(name='div', attrs={'id':'promo'}),
  56.          dict(name='div', attrs={'class':'linksWrapper'}),
  57.          dict(name='p', attrs={'class':'tag tvnews'}),
  58.         dict(name='p', attrs={'class':'tag movienews'}),
  59.         dict(name='p', attrs={'class':'tag musicnews'}),
  60.         dict(name='p', attrs={'class':'tag couples'}),
  61.         dict(name='p', attrs={'class':'tag gooddeeds'}),
  62.         dict(name='p', attrs={'class':'tag weddings'}),
  63.         dict(name='p', attrs={'class':'tag health'})
  64. ]
  65.  
  66.  
  67.     feeds = [
  68.         ('PEOPLE Headlines', 'http://feeds.people.com/people/headlines'),
  69.         ('US Headlines', 'http://www.usmagazine.com/celebrity_news/rss')
  70.         ]
  71.  
  72.     def get_article_url(self, article):
  73.         ans = article.link
  74.  
  75.         try:
  76.             self.log('Looking for full story link in', ans)
  77.             soup = self.index_to_soup(ans)
  78.             x = soup.find(text="View All")
  79.  
  80.             if x is not None:
  81.                 ans = ans + '?viewAll=y'
  82.                 self.log('Found full story link', ans)
  83.         except:
  84.             pass
  85.         return ans
  86.  
  87.     def postprocess_html(self, soup,first):
  88.  
  89.          for tag in soup.findAll(name='div',attrs={'class':"container_ate_qandatitle"}):
  90.                 tag.extract()
  91.          for tag in soup.findAll(name='br'):
  92.                 tag.extract()
  93.  
  94.          return soup
  95.