home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / msdnmag_en.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  66 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  5. '''
  6. msdn.microsoft.com/en-us/magazine
  7. '''
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9. from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
  10.  
  11. class MSDNMagazine_en(BasicNewsRecipe):
  12.     title                 = 'MSDN Magazine'
  13.     __author__            = 'Darko Miletic'
  14.     description           = 'The Microsoft Journal for Developers'
  15.     masthead_url          = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
  16.     publisher             = 'Microsoft Press'
  17.     category              = 'news, IT, Microsoft, programming, windows'
  18.     oldest_article        = 31
  19.     max_articles_per_feed = 100
  20.     no_stylesheets        = True
  21.     use_embedded_content  = False
  22.     encoding              = 'utf-8'
  23.     language              = 'en'
  24.  
  25.     base_url              = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
  26.     rss_url               = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
  27.  
  28.  
  29.     keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
  30.  
  31.     remove_tags = [
  32.                     dict(name='div', attrs={'class':'DivRatingsOnly'})
  33.                     ,dict(name='div', attrs={'class':'ShareThisButton4'})
  34.                   ]
  35.  
  36.     def find_articles(self):
  37.         idx_contents = self.browser.open(self.rss_url).read()
  38.         idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
  39.  
  40.         for article in idx.findAll('item'):
  41.             desc_html = self.tag_to_string(article.find('description'))
  42.             description = self.tag_to_string(BeautifulSoup(desc_html))
  43.  
  44.             a = {
  45.                     'title':  self.tag_to_string(article.find('title')),
  46.                     'url': self.tag_to_string(article.find('link')),
  47.                     'description': description,
  48.                     'date' : self.tag_to_string(article.find('pubdate')),
  49.                     }
  50.             yield a
  51.  
  52.  
  53.     def parse_index(self):
  54.         soup = self.index_to_soup(self.base_url)
  55.  
  56.         #find issue name, eg "August 2011"
  57.         issue_name = self.tag_to_string(soup.find('h1'))
  58.  
  59.         # find cover pic
  60.         img = soup.find('img',attrs ={'alt':issue_name})
  61.         if img is not None:
  62.             self.cover_url = img['src']
  63.  
  64.         return [(issue_name, list(self.find_articles()))]
  65.  
  66.