home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Chip 2011 November
/
CHIP_2011_11.iso
/
Programy
/
Narzedzia
/
Calibre
/
calibre-0.8.18.msi
/
file_280
/
msdnmag_en.recipe
< prev
next >
Wrap
Text File
|
2011-09-09
|
2KB
|
66 lines
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
msdn.microsoft.com/en-us/magazine
'''
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
class MSDNMagazine_en(BasicNewsRecipe):
title = 'MSDN Magazine'
__author__ = 'Darko Miletic'
description = 'The Microsoft Journal for Developers'
masthead_url = 'http://i3.msdn.microsoft.com/Platform/MasterPages/MsdnMagazine/smalllogo.png'
publisher = 'Microsoft Press'
category = 'news, IT, Microsoft, programming, windows'
oldest_article = 31
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
encoding = 'utf-8'
language = 'en'
base_url = 'http://msdn.microsoft.com/en-us/magazine/default.aspx'
rss_url = 'http://msdn.microsoft.com/en-us/magazine/rss/default.aspx?z=z&iss=1'
keep_only_tags = [dict(name='div', attrs={'id':'MainContent'})]
remove_tags = [
dict(name='div', attrs={'class':'DivRatingsOnly'})
,dict(name='div', attrs={'class':'ShareThisButton4'})
]
def find_articles(self):
idx_contents = self.browser.open(self.rss_url).read()
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
for article in idx.findAll('item'):
desc_html = self.tag_to_string(article.find('description'))
description = self.tag_to_string(BeautifulSoup(desc_html))
a = {
'title': self.tag_to_string(article.find('title')),
'url': self.tag_to_string(article.find('link')),
'description': description,
'date' : self.tag_to_string(article.find('pubdate')),
}
yield a
def parse_index(self):
soup = self.index_to_soup(self.base_url)
#find issue name, eg "August 2011"
issue_name = self.tag_to_string(soup.find('h1'))
# find cover pic
img = soup.find('img',attrs ={'alt':issue_name})
if img is not None:
self.cover_url = img['src']
return [(issue_name, list(self.find_articles()))]