home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / infomotori.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  58 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __author__    = 'Gabriele Marini, based on Darko Miletic'
  4. __copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
  5. description   = 'On Line Motor News - 01-05-2010'
  6.  
  7. '''
  8. http://www.infomotori.it/
  9. '''
  10. from calibre.ebooks.BeautifulSoup import BeautifulSoup
  11. from calibre.web.feeds.news import BasicNewsRecipe
  12.  
  13. class infomotori(BasicNewsRecipe):
  14.     author         = 'Gabriele Marini'
  15.     title          = u'Infomotori'
  16.     cover          = 'http://www.infomotori.com/content/files/anniversario_01.gif'
  17.     oldest_article        = 31
  18.     max_articles_per_feed = 100
  19.     recursion             = 100
  20.     use_embedded_content  = False
  21.  
  22.     language       = 'it'
  23.     use_embedded_content  = False
  24.     remove_javascript     = True
  25.     no_stylesheets        = True
  26.     language       = 'it'
  27.     timefmt        = '[%a, %d %b, %Y]'
  28.  
  29.  
  30.     def print_version(self, url):
  31.         raw = self.browser.open(url).read()
  32.         soup = BeautifulSoup(raw.decode('utf8', 'replace'))
  33.         print_link = soup.find('a', {'class':'printarticle'})
  34.  
  35.         '''if print_link is None:
  36.  
  37.                keep_only_tags     = [  dict(name='div', attrs={'class':['article main-column-article photogallery-column','category-header','article-body']})
  38.                                     ]
  39.                remove_tags        = [ dict(name='div', attrs={'class':['thumbnails-article','infoflash-footer','imushortarticle']}),
  40.                                       dict(name='div', attrs={'id':['linkinviastampa','linkspazioblu','altriarticoli','articoliconcorrenti','articolicorrelati','boxbrand']}),
  41.                                       dict(name='table', attrs={'class':'article-page'})
  42.                                     ]
  43.  
  44.                remove_tags_after  = [ dict(name='div', attrs={'id':'articlebody'})
  45.                                     ]
  46.                return url
  47.            '''
  48.         return print_link['href']
  49.  
  50.     feeds          =     [(u'Ultime Novit\xe0', u'http://feeds.infomotori.com/ultimenovita'),
  51.                           (u'Auto: Ultime Novit\xe0 ', u'http://feeds.infomotori.com/autonovita'),
  52.                           (u'Moto: Ultime Novit\xe0 Moto', u'http://feeds.infomotori.com/motonovita'),
  53.                           (u'Notizie Flash', u'http://feeds.infomotori.com/infoflashmotori'),
  54.                           (u'Veicoli Ecologici e Mobilit\xe0 Sostenibile', u'http://feeds.infomotori.com/ecomotori'),
  55.                           (u'4x4 Fuoristrada, Crossover e Suv', u'http://feeds.infomotori.com/fuoristrada'),
  56.                           (u'Shopping Motori', u'http://feeds.infomotori.com/shoppingmotori')
  57.                          ]
  58.