home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / billorielly.recipe < prev    next >
Text File  |  2011-09-09  |  3KB  |  87 lines

  1. #!/usr/bin/env  python
  2.  
  3. # ebook-convert.exe c:\billorielly.recipe c:\test -vv
  4.  
  5. from calibre.web.feeds.recipes import BasicNewsRecipe
  6.  
  7. class BillOReilly(BasicNewsRecipe):
  8.     cover_url             = 'http://images.billoreilly.com/images/headers/borbanner.jpg'
  9.     title                      = u"Bill O'Reilly"
  10.     __author__               = 'Rob Lammert - rob.lammert[at]gmail.com'
  11.     description                = u"Articles from Bill O'Reilly's website and his Fox New's website"
  12.     language = 'en'
  13.     oldest_article        = 7.0
  14.     max_articles_per_feed = 100
  15.     recursions            = 0
  16.     encoding              = 'utf8'
  17.     no_stylesheets        = True
  18.     remove_javascript     = True
  19.     #use_embedded_content  = False
  20.  
  21.  
  22.  #   feeds                 = [
  23.  #     ('Talking Points Memo', u'http://www.foxnews.com/xmlfeed/column/0,5184,19,00.rss'),
  24.  #     ('No Spin News', u'http://www.billoreilly.com/blog?rss=true&size=50&useBlurbs=true&categoryID=7')
  25.  #   ]
  26.  
  27.     def parse_index(self):
  28.       feeds = []
  29.  
  30.       articles_shows = self.bo_parse_shows('http://www.billoreilly.com/show?action=tvShowArchive')
  31.       articles_columns = self.bo_parse_columns('http://www.billoreilly.com/columns')
  32.  
  33.       if articles_shows:
  34.         feeds.append(("O'Reilly Factor", articles_shows))
  35.  
  36.       if articles_columns:
  37.         feeds.append(("Newspaper Column", articles_columns))
  38.  
  39.       return feeds
  40.  
  41.     def bo_parse_shows(self,url):
  42.       soup = self.index_to_soup(url)
  43.       links = soup.find(attrs={'class': 'showLinks'})
  44.  
  45.       current_articles = []
  46.       counter = 0
  47.       for lnk in links.findAllNext(attrs={'class': ['showLinks']}):
  48.         if counter <= 5:
  49.           title = self.tag_to_string(lnk)
  50.           url = lnk.get('href', False)
  51.  
  52.           if not url or not title:
  53.             continue
  54.  
  55.           if url.startswith('/'):
  56.             url = 'http://www.billoreilly.com'+url+'&dest=/pg/jsp/community/tvshowprint.jsp'
  57.  
  58.           self.log('\t\tFound article:', title)
  59.           self.log('\t\t\t', url)
  60.           current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
  61.           counter += 1
  62.       return current_articles
  63.  
  64.     def bo_parse_columns(self,url):
  65.       soup = self.index_to_soup(url)
  66.       links = soup.find(attrs={'id': 'bold'})
  67.  
  68.       current_articles = []
  69.       counter = 0
  70.       for lnk in links.findAllNext(attrs={'id': ['bold']}):
  71.         test = lnk.get('class', False)
  72.         if counter <= 5 and test == 'defaultLinks':
  73.           title = self.tag_to_string(lnk)
  74.           url = lnk.get('href', False)
  75.  
  76.           if not url or not title:
  77.             continue
  78.  
  79.           if url.startswith('/'):
  80.             url = 'http://www.billoreilly.com'+url+'&printerFriendly=true"'
  81.  
  82.           self.log('\t\tFound article:', title)
  83.           self.log('\t\t\t', url)
  84.           current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
  85.           counter += 1
  86.       return current_articles
  87.