home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / journalgazette.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  66 lines

  1. #!/usr/bin/env  python
  2. __license__   = 'GPL v3'
  3. __author__    = 'somedayson & TonytheBookworm, revised by Cynthia Clavey'
  4. __copyright__ = '2010, Cynthia Clavey cynvision@yahoo.com'
  5. __version__   = '1.02'
  6. __date__      = '05, september 2010'
  7. __docformat__ = 'restructuredtext en'
  8. from calibre.web.feeds.recipes import BasicNewsRecipe
  9.  
  10. class AdvancedUserRecipe1283666183(BasicNewsRecipe):
  11.     title          = u'Journal Gazette Ft. Wayne IN'
  12.     __author__     = 'cynvision'
  13.     oldest_article = 1
  14.     language = 'en'
  15.     max_articles_per_feed = 8
  16.     no_stylesheets = True
  17.     remove_javascript     = True
  18.     use_embedded_content  = False
  19.     keep_only_tags = [dict(name='div', attrs={'id':'mainContent'})]
  20.     extra_css = '#copyinfo { font-size: 6 ;} \n #photocredit { font-size: 6 ;} \n  .pubinfo { font-size: 6 ;}'
  21.     masthead_url          = 'http://www.journalgazette.net/img/icons/jgmini.gif'
  22. #    cover_url = 'http://www.journalgazette.net/img/icons/jgmini.gif'
  23.     encoding = 'cp1252'
  24.  
  25.     feeds = [(u'Opinion', u'http://journalgazette.net/apps/pbcs.dll/section?Category=EDIT&template=blogrss&mime=xml'),
  26.              (u'Local News',u'http://journalgazette.net/apps/pbcs.dll/section?Category=LOCAL&template=blogrss&mime=xml') ,
  27.              (u'Sports',u'http://journalgazette.net/apps/pbcs.dll/section?Category=SPORTS&template=blogrss&mime=xml' ),
  28.              (u'Features',u'http://journalgazette.net/apps/pbcs.dll/section?Category=FEAT&template=blogrss&mime=xml'),
  29.              (u'Business',u'http://journalgazette.net/apps/pbcs.dll/section?Category=BIZ&template=blogrss&mime=xml'),
  30.              (u'Ice Chips',u'http://journalgazette.net/apps/pbcs.dll/section?Category=BLOGS11&template=blogrss&mime=xml '),
  31.              (u'Entertainment',u'http://journalgazette.net/apps/pbcs.dll/section?Category=ENT&template=blogrss&mime=xml'),
  32.              (u'Food',u'http://journalgazette.net/apps/pbcs.dll/section?Category=FOOD&template=blogrss&mime=xml')
  33.             ]
  34.  
  35.  
  36.  
  37.  
  38.     def print_version(self, url):
  39.         split1 = url.split("/")
  40.         #print 'THE SPLIT IS: ', split1
  41.         #url1 = split1[0]
  42.         #url2 = split1[1]
  43.         url3 = split1[2]
  44.         #url4 = split1[3]
  45.         url5 = split1[4]
  46.         url6 = split1[5]
  47.         url7 = split1[6]
  48.         #url8 = split1[7]
  49.  
  50.   #need to convert to print_version
  51.   #originalversion is : http://www.journalgazette.net/article/20100905/EDIT10/309059959/1021/EDIT
  52.   #printversion should be: http://www.journalgazette.net/apps/pbcs.dll/article?AID=/20100905/EDIT10/309059959/-1/EDIT01&template=printart
  53.   #results of the split
  54.   #THE SPLIT IS:  [u'http:', u'', u'www.journalgazette.net', u'article', u'20100905', u'EDIT10', u'309059959', u'1021', u'EDIT']
  55.  
  56.  
  57.  
  58.         print_url = 'http://' + url3 + '/apps/pbcs.dll/article?AID=/' + url5 + '/' + url6 + '/' + url7 + '/-1/EDIT01&template=printart'
  59.         #print 'THIS URL WILL PRINT: ', print_url # this is a test string to see what the url is it will return
  60.         return print_url
  61.  
  62.     def preprocess_html(self, soup):
  63.         for item in soup.findAll(style=True):
  64.             del item['style']
  65.         return soup
  66.