home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / kamerabild.recipe < prev    next >
Text File  |  2011-09-09  |  2KB  |  47 lines

  1.  
  2. __license__   = 'GPL v3'
  3. __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
  4. '''
  5. www.kamerabild.se
  6. '''
  7.  
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9.  
  10. class Kamerabild(BasicNewsRecipe):
  11.     title                 = 'Kamera & Bild'
  12.     __author__            = 'Darko Miletic'
  13.     description           = 'Photo News from Sweden'
  14.     publisher             = 'politiken.dk'
  15.     category              = 'news, photograph, Sweden'
  16.     oldest_article        = 7
  17.     max_articles_per_feed = 100
  18.     no_stylesheets        = True
  19.     remove_empty_feeds    = True
  20.     use_embedded_content  = False
  21.     encoding              = 'utf8'
  22.     language              = 'sv'
  23.  
  24.     extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } .title{font-weight: bold} .pricerunnerAdContainer{border-bottom: 1px solid; border-top: 1px solid; margin-top: 0.5em; margin-bottom: 0.5em} .elementTeaserKicker{font-weight: bold; color: #AE0A10} '
  25.  
  26.     conversion_options = {
  27.                           'comment'  : description
  28.                         , 'tags'     : category
  29.                         , 'publisher': publisher
  30.                         , 'language' : language
  31.                         }
  32.  
  33.     feeds              = [(u'Articles', u'http://www.kamerabild.se/cmlink/Nyheter-fran-KAMERA-BILD-1.43315.xml')]
  34.     keep_only_tags     = [dict(name='div',attrs={'class':'container'})]
  35.     remove_tags_after = dict(name='div',attrs={'class':'editor'})
  36.     remove_tags        = [
  37.                             dict(name=['object','link','iframe'])
  38.                            ,dict(name='div',attrs={'class':['pricerunner_head','sideBar','img']})
  39.                          ]
  40.  
  41.     def preprocess_html(self, soup):
  42.         for item in soup.findAll(style=True):
  43.             del item['style']
  44.         return self.adeify_images(soup)
  45.  
  46.  
  47.