home *** CD-ROM | disk | FTP | other *** search
/ Chip 2011 November / CHIP_2011_11.iso / Programy / Narzedzia / Calibre / calibre-0.8.18.msi / file_280 / go_comics.recipe < prev    next >
Text File  |  2011-09-09  |  31KB  |  449 lines

  1. #!/usr/bin/env  python
  2.  
  3. __license__   = 'GPL v3'
  4. __copyright__ = 'Copyright 2010 Starson17'
  5. '''
  6. www.gocomics.com
  7. '''
  8. from calibre.web.feeds.news import BasicNewsRecipe
  9. import mechanize, re
  10.  
  11. class GoComics(BasicNewsRecipe):
  12.     title               = 'GoComics'
  13.     __author__          = 'Starson17'
  14.     __version__         = '1.06'
  15.     __date__            = '07 June 2011'
  16.     description         = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
  17.     category            = 'news, comics'
  18.     language            = 'en'
  19.     use_embedded_content= False
  20.     no_stylesheets      = True
  21.     remove_javascript   = True
  22.     cover_url           = 'http://paulbuckley14059.files.wordpress.com/2008/06/calvin-and-hobbes.jpg'
  23.     remove_attributes = ['style']
  24.  
  25.     ####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
  26.     # num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
  27.     num_comics_to_get = 7
  28.     # comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large
  29.     comic_size = 900
  30.     # CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
  31.     # Please do not overload their servers by selecting all comics and 1000 strips from each!
  32.  
  33.     conversion_options = {'linearize_tables'  : True
  34.                         , 'comment'           : description
  35.                         , 'tags'              : category
  36.                         , 'language'          : language
  37.                         }
  38.  
  39.     keep_only_tags     = [dict(name='div', attrs={'class':['feature','banner']}),
  40.                           ]
  41.  
  42.     remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
  43.                    dict(name='div', attrs={'class':['tag-wrapper']}),
  44.                    dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
  45.                    dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
  46.                    dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
  47.                    ]
  48.  
  49.     def get_browser(self):
  50.         br = BasicNewsRecipe.get_browser(self)
  51.         cookies = mechanize.CookieJar()
  52.         br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
  53.         br.addheaders = [('Referer','http://www.gocomics.com/')]
  54.         return br
  55.  
  56.     def parse_index(self):
  57.         feeds = []
  58.         for title, url in [
  59.                        (u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
  60.                        #(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
  61.                        (u"9 to 5", u"http://www.gocomics.com/9to5"),
  62.                        #(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
  63.                        (u"Agnes", u"http://www.gocomics.com/agnes"),
  64.                        #(u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
  65.                        #(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
  66.                        #(u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
  67.                        #(u"Annie", u"http://www.gocomics.com/annie"),
  68.                        #(u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
  69.                        #(u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
  70.                        (u"B.C.", u"http://www.gocomics.com/bc"),
  71.                        #(u"Back in the Day", u"http://www.gocomics.com/backintheday"),
  72.                        #(u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
  73.                        #(u"Baldo", u"http://www.gocomics.com/baldo"),
  74.                        #(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
  75.                        #(u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
  76.                        #(u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
  77.                        #(u"Ben", u"http://www.gocomics.com/ben"),
  78.                        #(u"Betty", u"http://www.gocomics.com/betty"),
  79.                        #(u"Bewley", u"http://www.gocomics.com/bewley"),
  80.                        #(u"Big Nate", u"http://www.gocomics.com/bignate"),
  81.                        #(u"Big Top", u"http://www.gocomics.com/bigtop"),
  82.                        #(u"Biographic", u"http://www.gocomics.com/biographic"),
  83.                        #(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
  84.                        #(u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
  85.                        #(u"Bliss", u"http://www.gocomics.com/bliss"),
  86.                        (u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
  87.                        #(u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
  88.                        #(u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
  89.                        #(u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
  90.                        #(u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
  91.                        #(u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
  92.                        #(u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
  93.                        #(u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
  94.                        #(u"Brevity", u"http://www.gocomics.com/brevity"),
  95.                        #(u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
  96.                        #(u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
  97.                        (u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
  98.                        #(u"Candorville", u"http://www.gocomics.com/candorville"),
  99.                        #(u"Cathy", u"http://www.gocomics.com/cathy"),
  100.                        #(u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
  101.                        #(u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
  102.                        #(u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
  103.                        #(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
  104.                        #(u"Cleats", u"http://www.gocomics.com/cleats"),
  105.                        #(u"Close to Home", u"http://www.gocomics.com/closetohome"),
  106.                        #(u"Committed", u"http://www.gocomics.com/committed"),
  107.                        #(u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
  108.                        #(u"Cornered", u"http://www.gocomics.com/cornered"),
  109.                        #(u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
  110.                        #(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
  111.                        #(u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
  112.                        #(u"Deep Cover", u"http://www.gocomics.com/deepcover"),
  113.                        #(u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
  114.                        (u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
  115.                        #(u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
  116.                        (u"Doodles", u"http://www.gocomics.com/doodles"),
  117.                        (u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
  118.                        #(u"Drabble", u"http://www.gocomics.com/drabble"),
  119.                        #(u"Eek!", u"http://www.gocomics.com/eek"),
  120.                        #(u"F Minus", u"http://www.gocomics.com/fminus"),
  121.                        #(u"Family Tree", u"http://www.gocomics.com/familytree"),
  122.                        #(u"Farcus", u"http://www.gocomics.com/farcus"),
  123.                        (u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
  124.                        #(u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
  125.                        #(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
  126.                        (u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
  127.                        #(u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
  128.                        #(u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
  129.                        #(u"Fort Knox", u"http://www.gocomics.com/fortknox"),
  130.                        #(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
  131.                        (u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
  132.                        #(u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
  133.                        #(u"Frazz", u"http://www.gocomics.com/frazz"),
  134.                        #(u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
  135.                        #(u"Free Range", u"http://www.gocomics.com/freerange"),
  136.                        #(u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
  137.                        #(u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
  138.                        (u"Garfield", u"http://www.gocomics.com/garfield"),
  139.                        #(u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
  140.                        #(u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
  141.                        #(u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
  142.                        #(u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
  143.                        #(u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
  144.                        #(u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
  145.                        #(u"Graffiti", u"http://www.gocomics.com/graffiti"),
  146.                        #(u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
  147.                        #(u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
  148.                        #(u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
  149.                        (u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
  150.                        #(u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
  151.                        #(u"Herman", u"http://www.gocomics.com/herman"),
  152.                        #(u"Home and Away", u"http://www.gocomics.com/homeandaway"),
  153.                        #(u"Housebroken", u"http://www.gocomics.com/housebroken"),
  154.                        #(u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
  155.                        #(u"Imagine This", u"http://www.gocomics.com/imaginethis"),
  156.                        #(u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
  157.                        #(u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
  158.                        #(u"Ink Pen", u"http://www.gocomics.com/inkpen"),
  159.                        #(u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
  160.                        #(u"Jane's World", u"http://www.gocomics.com/janesworld"),
  161.                        #(u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
  162.                        #(u"Jump Start", u"http://www.gocomics.com/jumpstart"),
  163.                        #(u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
  164.                        #(u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
  165.                        #(u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
  166.                        #(u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
  167.                        #(u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
  168.                        #(u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
  169.                        #(u"Lio", u"http://www.gocomics.com/lio"),
  170.                        #(u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
  171.                        #(u"Little Otto", u"http://www.gocomics.com/littleotto"),
  172.                        #(u"Lola", u"http://www.gocomics.com/lola"),
  173.                        #(u"Loose Parts", u"http://www.gocomics.com/looseparts"),
  174.                        #(u"Love Is...", u"http://www.gocomics.com/loveis"),
  175.                        #(u"Luann", u"http://www.gocomics.com/luann"),
  176.                        #(u"Maintaining", u"http://www.gocomics.com/maintaining"),
  177.                        (u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
  178.                        #(u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
  179.                        #(u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
  180.                        #(u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
  181.                        #(u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
  182.                        (u"Momma", u"http://www.gocomics.com/momma"),
  183.                        #(u"Monty", u"http://www.gocomics.com/monty"),
  184.                        #(u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
  185.                        (u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
  186.                        #(u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
  187.                        #(u"Nancy", u"http://www.gocomics.com/nancy"),
  188.                        #(u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
  189.                        #(u"Nest Heads", u"http://www.gocomics.com/nestheads"),
  190.                        #(u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
  191.                        #(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
  192.                        #(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
  193.                        #(u"Off The Mark", u"http://www.gocomics.com/offthemark"),
  194.                        #(u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
  195.                        #(u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
  196.                        #(u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
  197.                        #(u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
  198.                        #(u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
  199.                        #(u"Overboard", u"http://www.gocomics.com/overboard"),
  200.                        #(u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
  201.                        (u"Peanuts", u"http://www.gocomics.com/peanuts"),
  202.                        #(u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"),
  203.                        #(u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
  204.                        #(u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
  205.                        (u"Pickles", u"http://www.gocomics.com/pickles"),
  206.                        #(u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
  207.                        #(u"Pluggers", u"http://www.gocomics.com/pluggers"),
  208.                        #(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
  209.                        #(u"PreTeena", u"http://www.gocomics.com/preteena"),
  210.                        #(u"Prickly City", u"http://www.gocomics.com/pricklycity"),
  211.                        #(u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
  212.                        #(u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
  213.                        #(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
  214.                        #(u"Reality Check", u"http://www.gocomics.com/realitycheck"),
  215.                        #(u"Red and Rover", u"http://www.gocomics.com/redandrover"),
  216.                        #(u"Red Meat", u"http://www.gocomics.com/redmeat"),
  217.                        #(u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
  218.                        #(u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
  219.                        #(u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"),
  220.                        #(u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
  221.                        #(u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
  222.                        #(u"Rubes", u"http://www.gocomics.com/rubes"),
  223.                        #(u"Rudy Park", u"http://www.gocomics.com/rudypark"),
  224.                        #(u"Scary Gary", u"http://www.gocomics.com/scarygary"),
  225.                        #(u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
  226.                        #(u"Shoe", u"http://www.gocomics.com/shoe"),
  227.                        #(u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
  228.                        #(u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
  229.                        #(u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
  230.                        #(u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
  231.                        #(u"Speed Bump", u"http://www.gocomics.com/speedbump"),
  232.                        #(u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
  233.                        #(u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
  234.                        #(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
  235.                        #(u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
  236.                        #(u"Sylvia", u"http://www.gocomics.com/sylvia"),
  237.                        #(u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
  238.                        #(u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
  239.                        #(u"That's Life", u"http://www.gocomics.com/thatslife"),
  240.                        #(u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
  241.                        #(u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
  242.                        #(u"The Barn", u"http://www.gocomics.com/thebarn"),
  243.                        #(u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
  244.                        #(u"The Boondocks", u"http://www.gocomics.com/boondocks"),
  245.                        #(u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
  246.                        #(u"The Buckets", u"http://www.gocomics.com/thebuckets"),
  247.                        #(u"The City", u"http://www.gocomics.com/thecity"),
  248.                        #(u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
  249.                        #(u"The Doozies", u"http://www.gocomics.com/thedoozies"),
  250.                        #(u"The Duplex", u"http://www.gocomics.com/duplex"),
  251.                        #(u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
  252.                        #(u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
  253.                        #(u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
  254.                        #(u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
  255.                        #(u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
  256.                        #(u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
  257.                        #(u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
  258.                        #(u"The Middletons", u"http://www.gocomics.com/themiddletons"),
  259.                        #(u"The Norm", u"http://www.gocomics.com/thenorm"),
  260.                        #(u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
  261.                        #(u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
  262.                        #(u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
  263.                        #(u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
  264.                        #(u"TOBY", u"http://www.gocomics.com/toby"),
  265.                        #(u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
  266.                        #(u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
  267.                        #(u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
  268.                        #(u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
  269.                        #(u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
  270.                        #(u"Wee Pals", u"http://www.gocomics.com/weepals"),
  271.                        #(u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
  272.                        #(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
  273.                        #(u"Working Daze", u"http://www.gocomics.com/workingdaze"),
  274.                        #(u"Working It Out", u"http://www.gocomics.com/workingitout"),
  275.                        #(u"Yenny", u"http://www.gocomics.com/yenny"),
  276.                        #(u"Zack Hill", u"http://www.gocomics.com/zackhill"),
  277.                        (u"Ziggy", u"http://www.gocomics.com/ziggy"),
  278.                        #
  279.                        ######## EDITORIAL CARTOONS #####################
  280.                        (u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
  281.                        #(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
  282.                        #(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
  283.                        #(u"Bill Day", u"http://www.gocomics.com/billday"),
  284.                        #(u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
  285.                        #(u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
  286.                        (u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
  287.                        #(u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
  288.                        #(u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
  289.                        #(u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
  290.                        #(u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
  291.                        #(u"Chip Bok",u"http://www.gocomics.com/chipbok"),
  292.                        #(u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
  293.                        #(u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
  294.                        #(u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
  295.                        #(u"Clay Jones",u"http://www.gocomics.com/clayjones"),
  296.                        #(u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
  297.                        #(u"Dana Summers",u"http://www.gocomics.com/danasummers"),
  298.                        #(u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
  299.                        #(u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
  300.                        (u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
  301.                        #(u"Don Wright",u"http://www.gocomics.com/donwright"),
  302.                        #(u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
  303.                        #(u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
  304.                        #(u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
  305.                        #(u"Ed Stein", u"http://www.gocomics.com/edstein"),
  306.                        #(u"Eric Allie", u"http://www.gocomics.com/ericallie"),
  307.                        #(u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
  308.                        #(u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
  309.                        #(u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
  310.                        #(u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
  311.                        #(u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
  312.                        #(u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
  313.                        #(u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
  314.                        #(u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
  315.                        #(u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
  316.                        #(u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
  317.                        #(u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
  318.                        #(u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
  319.                        #(u"Joel Pett",u"http://www.gocomics.com/joelpett"),
  320.                        #(u"John Cole", u"http://www.gocomics.com/johncole"),
  321.                        #(u"John Darkow", u"http://www.gocomics.com/johndarkow"),
  322.                        #(u"John Deering",u"http://www.gocomics.com/johndeering"),
  323.                        #(u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
  324.                        #(u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
  325.                        #(u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
  326.                        #(u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
  327.                        #(u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
  328.                        #(u"Larry Wright", u"http://www.gocomics.com/larrywright"),
  329.                        #(u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
  330.                        #(u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
  331.                        #(u"Matt Bors", u"http://www.gocomics.com/mattbors"),
  332.                        #(u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
  333.                        #(u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
  334.                        #(u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
  335.                        #(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
  336.                        #(u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
  337.                        #(u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
  338.                        #(u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
  339.                        #(u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
  340.                        #(u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
  341.                        #(u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
  342.                        #(u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
  343.                        #(u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
  344.                        #(u"Paul Szep", u"http://www.gocomics.com/paulszep"),
  345.                        #(u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
  346.                        #(u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
  347.                        #(u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
  348.                        #(u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
  349.                        #(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
  350.                        #(u"Small World",u"http://www.gocomics.com/smallworld"),
  351.                        #(u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
  352.                        #(u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
  353.                        #(u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
  354.                        #(u"Steve Sack", u"http://www.gocomics.com/stevesack"),
  355.                        #(u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
  356.                        #(u"Ted Rall",u"http://www.gocomics.com/tedrall"),
  357.                        #(u"(Th)ink", u"http://www.gocomics.com/think"),
  358.                        #(u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
  359.                        (u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
  360.                        #(u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
  361.                        #(u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
  362.                        #(u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
  363.                        #(u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
  364.                        #(u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
  365.                        #(u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
  366.                        #(u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
  367.                        #(u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
  368.                        (u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
  369.                        #(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
  370.                        #(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
  371.                              ]:
  372.             print 'Working on: ', title
  373.             articles = self.make_links(url)
  374.             if articles:
  375.                 feeds.append((title, articles))
  376.         return feeds
  377.  
  378.     def make_links(self, url):
  379.         title = 'Temp'
  380.         current_articles = []
  381.         pages = range(1, self.num_comics_to_get+1)
  382.         for page in pages:
  383.             page_soup = self.index_to_soup(url)
  384.             if page_soup:
  385.                 try:
  386.                   strip_title = page_soup.find(name='div', attrs={'class':'top'}).h1.a.string
  387.                 except:
  388.                   strip_title = 'Error - no Title found'
  389.                 try:
  390.                   date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
  391.                   if not date_title:
  392.                       date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
  393.                 except:
  394.                   date_title = 'Error - no Date found'
  395.                 title = strip_title + ' - ' + date_title
  396.                 for i in range(2):
  397.                   try:
  398.                     strip_url_date = page_soup.find(name='div', attrs={'class':'top'}).h1.a['href']
  399.                     break #success - this is normal exit
  400.                   except:
  401.                     strip_url_date = None
  402.                     continue #try to get strip_url_date again
  403.                 for i in range(2):
  404.                   try:
  405.                     prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
  406.                     break #success - this is normal exit
  407.                   except:
  408.                     prev_strip_url_date = None
  409.                     continue #try to get prev_strip_url_date again
  410.                 if strip_url_date:
  411.                   page_url = 'http://www.gocomics.com' + strip_url_date
  412.                 else:
  413.                   continue
  414.                 if prev_strip_url_date:
  415.                   prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date
  416.                 else:
  417.                   continue
  418.             current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
  419.             url = prev_page_url
  420.         current_articles.reverse()
  421.         return current_articles
  422.  
  423.     def preprocess_html(self, soup):
  424.         if soup.title:
  425.             title_string = soup.title.string.strip()
  426.             _cd = title_string.split(',',1)[1]
  427.             comic_date = ' '.join(_cd.split(' ', 4)[0:-1])
  428.         if soup.h1.span:
  429.             artist = soup.h1.span.string
  430.             soup.h1.span.string.replaceWith(comic_date + artist)
  431.         feature_item = soup.find('p',attrs={'class':'feature_item'})
  432.         if feature_item.a:
  433.             a_tag = feature_item.a
  434.             a_href = a_tag["href"]
  435.             img_tag = a_tag.img
  436.             img_tag["src"] = a_href
  437.             img_tag["width"] = self.comic_size
  438.             img_tag["height"] = None
  439.         return self.adeify_images(soup)
  440.  
  441.     extra_css = '''
  442.                     h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
  443.                     h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
  444.                     img {max-width:100%; min-width:100%;}
  445.                     p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
  446.                     body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
  447.         '''
  448.  
  449.