home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / todays_zaman.recipe < prev    next >
Text File  |  2011-09-09  |  4KB  |  54 lines

  1. from calibre.web.feeds.news import BasicNewsRecipe
  2.  
  3. class TodaysZaman_en(BasicNewsRecipe):
  4.     title          = u'Todays Zaman'
  5.     __author__            = u'thomass'
  6.     description            = 'a Turkey based daily for national and international news in the fields of business, diplomacy, politics, culture, arts, sports and economics, in addition to commentaries, specials and features'
  7.     oldest_article         = 2
  8.     max_articles_per_feed  =100
  9.     no_stylesheets         = True
  10.     #delay                  = 1
  11.     #use_embedded_content   = False
  12.     encoding               = 'utf-8'
  13.     #publisher              = '  '
  14.     category               = 'news, haberler,TR,gazete'
  15.     language               = 'en_TR'
  16.     publication_type = 'newspaper'
  17.     #extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
  18.     #keep_only_tags    = [dict(name='font', attrs={'class':['newsDetail','agenda2NewsSpot']}),dict(name='span', attrs={'class':['agenda2Title']}),dict(name='div', attrs={'id':['gallery']})]
  19.     keep_only_tags    = [dict(name='h1', attrs={'class':['georgia_30']}),dict(name='span', attrs={'class':['left-date','detailDate','detailCName']}),dict(name='td', attrs={'id':['newsSpot','newsText']})] #resim ekleme:  ,dict(name='div', attrs={'id':['gallery','detailDate',]})
  20.  
  21.     remove_attributes = ['aria-describedby']
  22.     remove_tags  = [dict(name='img', attrs={'src':['/images/icon_print.gif','http://gmodules.com/ig/images/plus_google.gif','/images/template/jazz/agenda/i1.jpg', 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp']}),dict(name='hr', attrs={'class':[ 'interactive-hr']}),dict(name='div', attrs={'class':[ 'empty_height_18','empty_height_9']}) ,dict(name='td', attrs={'id':[ 'superTitle']}),dict(name='span', attrs={'class':[ 't-count enabled t-count-focus']}),dict(name='a', attrs={'id':[ 'count']}),dict(name='td', attrs={'class':[ 'left-date']})  ]
  23.     cover_img_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
  24.     masthead_url = 'http://medya.todayszaman.com/todayszaman/images/logo/logo.bmp'
  25.     remove_empty_feeds= True
  26.    # remove_attributes = ['width','height']
  27.  
  28.     feeds          = [
  29.                       ( u'Home', u'http://www.todayszaman.com/rss?sectionId=0'),
  30.                       ( u'News', u'http://www.todayszaman.com/rss?sectionId=100'),
  31.                       ( u'Business', u'http://www.todayszaman.com/rss?sectionId=105'),
  32.                       ( u'Interviews', u'http://www.todayszaman.com/rss?sectionId=8'),
  33.                       ( u'Columnists', u'http://www.todayszaman.com/rss?sectionId=6'),
  34.                       ( u'Op-Ed', u'http://www.todayszaman.com/rss?sectionId=109'),
  35.                       ( u'Arts & Culture', u'http://www.todayszaman.com/rss?sectionId=110'),
  36.                       ( u'Expat Zone', u'http://www.todayszaman.com/rss?sectionId=132'),
  37.                       ( u'Sports', u'http://www.todayszaman.com/rss?sectionId=5'),
  38.                       ( u'Features', u'http://www.todayszaman.com/rss?sectionId=116'),
  39.                       ( u'Travel', u'http://www.todayszaman.com/rss?sectionId=117'),
  40.                       ( u'Leisure', u'http://www.todayszaman.com/rss?sectionId=118'),
  41.                       ( u'Weird But True', u'http://www.todayszaman.com/rss?sectionId=134'),
  42.                       ( u'Life', u'http://www.todayszaman.com/rss?sectionId=133'),
  43.                       ( u'Health', u'http://www.todayszaman.com/rss?sectionId=126'),
  44.                       ( u'Press Review', u'http://www.todayszaman.com/rss?sectionId=130'),
  45.                       ( u'Todays think tanks', u'http://www.todayszaman.com/rss?sectionId=159'),
  46.  
  47.                         ]
  48.  
  49.     #def preprocess_html(self, soup):
  50.      #   return self.adeify_images(soup)
  51.     #def print_version(self, url):       #there is a probem caused by table format
  52.      #return url.replace('http://www.todayszaman.com/newsDetail_getNewsById.action?load=detay&', 'http://www.todayszaman.com/newsDetail_openPrintPage.action?')
  53.  
  54.