home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / soldiers.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  52 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. www.army.mil/soldiers/
  6. '''
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class Soldiers(BasicNewsRecipe):
  11.     title                  = 'Soldiers'
  12.     __author__             = 'Darko Miletic'
  13.     description            = 'The Official U.S. Army Magazine'
  14.     oldest_article         = 30
  15.     max_articles_per_feed  = 100
  16.     no_stylesheets         = True
  17.     use_embedded_content   = False
  18.     simultaneous_downloads = 1
  19.     delay                  = 4
  20.     max_connections        = 1    
  21.     encoding               = 'utf-8'
  22.     publisher              = 'U.S. Army'
  23.     category               = 'news, politics, war, weapons'    
  24.     language               = 'en'
  25.     INDEX                  = 'http://www.army.mil/soldiers/'
  26.  
  27.     conversion_options = {
  28.                           'comment'          : description
  29.                         , 'tags'             : category
  30.                         , 'publisher'        : publisher
  31.                         , 'language'         : language
  32.                         }
  33.  
  34.     keep_only_tags = [dict(name='div', attrs={'id':['storyHeader','textArea']})]
  35.                      
  36.     remove_tags = [
  37.                      dict(name='div', attrs={'id':['addThis','comment','articleFooter']})
  38.                     ,dict(name=['object','link'])
  39.                   ]
  40.                             
  41.     feeds = [(u'Frontpage', u'http://www.army.mil/rss/feeds/soldiersfrontpage.xml' )]
  42.  
  43.  
  44.     def get_cover_url(self):
  45.         cover_url = None
  46.         soup = self.index_to_soup(self.INDEX)
  47.         cover_item = soup.find('img',attrs={'alt':'Current Magazine Cover'})
  48.         if cover_item:
  49.            cover_url = cover_item['src']
  50.         return cover_url
  51.  
  52.