Personal Computer World 2008 February

home *** CD-ROM | disk | FTP | other *** search

/ Personal Computer World 2008 February / PCWFEB08.iso / Software / Freeware / Miro 1.0 / Miro_Installer.exe / xulrunner / python / olddatabaseupgrade.py < prev next >

Wrap

Python Source | 2007-11-12 | 19.3 KB | 561 lines

# Miro - an RSS based video player application # Copyright (C) 2005-2007 Participatory Culture Foundation # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA """Module used to upgrade from databases before we had our current scheme. Strategy: * Unpickle old databases using a subclass of pickle.Unpickle that loads fake class objects for all our DDBObjects. The fake classes are just empty shells with the upgrade code that existed when we added the schema module. * Save those objects to disk, using the initial schema of the new system. """ from new import classobj from copy import copy from datetime import datetime import pickle import shutil import threading import types import time from schema import ObjectSchema, SchemaInt, SchemaFloat, SchemaSimpleItem from schema import SchemaObject, SchemaBool, SchemaDateTime, SchemaTimeDelta from schema import SchemaList, SchemaDict from fasttypes import LinkedList from types import NoneType import storedatabase ######################### STAGE 1 helpers ############################# # Below is a snapshot of what the database looked like at 0.8.2. DDBObject # classes and other classes that get saved in the database are present only as # skeletons, all we want from them is their __setstate__ method. # # The __setstate_ methods are almost exactly like they were in 0.8.2. I # removed some things that don't apply to us simple restoring, then saving the # database (starting a Thread, sending messages to the downloader daemon, # etc.). I added some things to make things compatible with our schema, # mostly this means setting attributes to None, where before we used the fact # that access the attribute would throw an AttributeError (ugh!). # # We prepend "Old" to the DDBObject so they're easy to recognize if # somehow they slip through to a real database # # ObjectSchema # classes are exactly as they appeared in version 6 of the schema. # # Why version 6? # Previous versions were in RC's. They dropped some of the data that we # need to import from old databases By making olddatabaseupgrade start on # version 6 we avoid that bug, while still giving the people using version 1 # and 2 an upgrade path that does something. def defaultFeedIconURL(): import resources return resources.url("images/feedicon.png") #Dummy class for removing bogus FileItem instances class DropItLikeItsHot(object): __DropMeLikeItsHot = True def __slurp(self, *args, **kwargs): pass def __getattr__(self, attr): if attr == '__DropMeLikeItsHot': return self.__DropMeLikeItsHot else: print "DTV: WARNING! Attempt to call '%s' on DropItLikeItsHot instance" % attr import traceback traceback.print_stack() return self.__slurp __setstate__ = __slurp def __repr__(self): return "DropMeLikeItsHot" def __str__(self): return "DropMeLikeItsHot" class OldDDBObject(object): pass class OldItem(OldDDBObject): # allOldItems is a hack to get around the fact that old databases can have # items that aren't at the top level. In fact, they can be in fairly # crazy places. See bug #2515. So we need to keep track of the items # when we unpickle the objects. allOldItems = set() def __setstate__(self, state): (version, data) = state if version == 0: data['pendingManualDL'] = False if not data.has_key('linkNumber'): data['linkNumber'] = 0 version += 1 if version == 1: data['keep'] = False data['pendingReason'] = "" version += 1 if version == 2: data['creationTime'] = datetime.now() version += 1 assert(version == 3) data['startingDownload'] = False self.__dict__ = data # Older versions of the database allowed Feed Implementations # to act as feeds. If that's the case, change feed attribute # to contain the actual feed. # NOTE: This assumes that the feed object is decoded # before its items. That appears to be generally true if not issubclass(self.feed.__class__, OldDDBObject): try: self.feed = self.feed.ufeed except: self.__class__ = DropItLikeItsHot if self.__class__ is OldFileItem: self.__class__ = DropItLikeItsHot self.iconCache = None if not 'downloadedTime' in data: self.downloadedTime = None OldItem.allOldItems.add(self) class OldFileItem(OldItem): pass class OldFeed(OldDDBObject): def __setstate__(self,state): (version, data) = state if version == 0: version += 1 if version == 1: data['thumbURL'] = defaultFeedIconURL() version += 1 if version == 2: data['lastViewed'] = datetime.min data['unwatched'] = 0 data['available'] = 0 version += 1 assert(version == 3) data['updating'] = False if not data.has_key('initiallyAutoDownloadable'): data['initiallyAutoDownloadable'] = True self.__dict__ = data # This object is useless without a FeedImpl associated with it if not data.has_key('actualFeed'): self.__class__ = DropItLikeItsHot self.iconCache = None class OldFolder(OldDDBObject): pass class OldHTTPAuthPassword(OldDDBObject): pass class OldFeedImpl: def __setstate__(self, data): self.__dict__ = data if 'expireTime' not in data: self.expireTime = None # Some feeds had invalid updating freq. Catch that error here, so we # don't lose the dabatase when we restore it. try: self.updateFreq = int(self.updateFreq) except ValueError: self.updateFreq = -1 class OldScraperFeedImpl(OldFeedImpl): def __setstate__(self,state): (version, data) = state assert(version == 0) data['updating'] = False data['tempHistory'] = {} OldFeedImpl.__setstate__(self, data) class OldRSSFeedImpl(OldFeedImpl): def __setstate__(self,state): (version, data) = state assert(version == 0) data['updating'] = False OldFeedImpl.__setstate__(self, data) class OldSearchFeedImpl(OldRSSFeedImpl): pass class OldSearchDownloadsFeedImpl(OldFeedImpl): pass class OldDirectoryFeedImpl(OldFeedImpl): def __setstate__(self,state): (version, data) = state assert(version == 0) data['updating'] = False if not data.has_key('initialUpdate'): data['initialUpdate'] = False OldFeedImpl.__setstate__(self, data) class OldRemoteDownloader(OldDDBObject): def __setstate__(self,state): (version, data) = state self.__dict__ = copy(data) self.status = {} for key in ('startTime', 'endTime', 'filename', 'state', 'currentSize', 'totalSize', 'reasonFailed'): self.status[key] = self.__dict__[key] del self.__dict__[key] # force the download daemon to create a new downloader object. self.dlid = 'noid' class OldChannelGuide(OldDDBObject): def __setstate__(self,state): (version, data) = state if version == 0: self.sawIntro = data['viewed'] self.cachedGuideBody = None self.loadedThisSession = False self.cond = threading.Condition() else: assert(version == 1) self.__dict__ = data self.cond = threading.Condition() self.loadedThisSession = False if not data.has_key('id'): self.__class__ = DropItLikeItsHot # No need to load a fresh channel guide here. class OldMetainfo(OldDDBObject): pass fakeClasses = { 'item.Item': OldItem, 'item.FileItem': OldFileItem, 'feed.Feed': OldFeed, 'feed.FeedImpl': OldFeedImpl, 'feed.RSSFeedImpl': OldRSSFeedImpl, 'feed.ScraperFeedImpl': OldScraperFeedImpl, 'feed.SearchFeedImpl': OldSearchFeedImpl, 'feed.DirectoryFeedImpl': OldDirectoryFeedImpl, 'feed.SearchDownloadsFeedImpl': OldSearchDownloadsFeedImpl, 'downloader.HTTPAuthPassword': OldHTTPAuthPassword, 'downloader.RemoteDownloader': OldRemoteDownloader, 'guide.ChannelGuide': OldChannelGuide, # Drop these classes like they're hot! # # YahooSearchFeedImpl is a leftover class that we don't use anymore. # # The HTTPDownloader and BTDownloader classes were removed in 0.8.2. The # cleanest way to handle them is to just drop them. If the user still has # these in their database, too bad. BTDownloaders may contain BTDisplay # and BitTorrent.ConvertedMetainfo.ConvertedMetainfo objects, drop those # too. # # We use BitTornado now, so drop the metainfo... We should recreate it # after the upgrade. # # DownloaderFactory and StaticTab shouldn't be pickled, but I've seen # databases where it is. # # We used to have classes called RSSFeed, ScraperFeed, etc. Now we have # the Feed class which contains a FeedImpl subclass. Since this only # happens on really old databases, we should just drop the old ones. 'BitTorrent.ConvertedMetainfo.ConvertedMetainfo': DropItLikeItsHot, 'downloader.DownloaderFactory': DropItLikeItsHot, 'app.StaticTab': DropItLikeItsHot, 'feed.YahooSearchFeedImpl': DropItLikeItsHot, 'downloader.BTDownloader': DropItLikeItsHot, 'downloader.BTDisplay': DropItLikeItsHot, 'downloader.HTTPDownloader': DropItLikeItsHot, 'scheduler.ScheduleEvent': DropItLikeItsHot, 'feed.UniversalFeed' : DropItLikeItsHot, 'feed.RSSFeed': DropItLikeItsHot, 'feed.ScraperFeed': DropItLikeItsHot, 'feed.SearchFeed': DropItLikeItsHot, 'feed.DirectoryFeed': DropItLikeItsHot, 'feed.SearchDownloadsFeed': DropItLikeItsHot, } class FakeClassUnpickler(pickle.Unpickler): unpickleNormallyWhitelist = [ 'datetime.datetime', 'datetime.timedelta', 'time.struct_time', 'feedparser.FeedParserDict', '__builtin__.unicode', ] def find_class(self, module, name): fullyQualifiedName = "%s.%s" % (module, name) if fullyQualifiedName in fakeClasses: return fakeClasses[fullyQualifiedName] elif fullyQualifiedName in self.unpickleNormallyWhitelist: return pickle.Unpickler.find_class(self, module, name) else: raise ValueError("Unrecognized class: %s" % fullyQualifiedName) class IconCache: # We need to define this class for the ItemSchema. In practice we will # always use None instead of one of these objects. pass ######################### STAGE 2 helpers ############################# class DDBObjectSchema(ObjectSchema): klass = OldDDBObject classString = 'ddb-object' fields = [ ('id', SchemaInt()) ] # Unlike the SchemaString in schema.py, this allows binary strings or # unicode strings class SchemaString(SchemaSimpleItem): def validate(self, data): super(SchemaSimpleItem, self).validate(data) self.validateTypes(data, (unicode, str)) # Unlike the simple container in schema.py, this allows binary strings class SchemaSimpleContainer(SchemaSimpleItem): """Allows nested dicts, lists and tuples, however the only thing they can store are simple objects. This currently includes bools, ints, longs, floats, strings, unicode, None, datetime and struct_time objects. """ def validate(self, data): super(SchemaSimpleContainer, self).validate(data) self.validateTypes(data, (dict, list, tuple)) self.memory = set() toValidate = LinkedList() while data: if id(data) in self.memory: return else: self.memory.add(id(data)) if isinstance(data, list) or isinstance(data, tuple): for item in data: toValidate.append(item) elif isinstance(data, dict): for key, value in data.items(): self.validateTypes(key, [bool, int, long, float, unicode, str, NoneType, datetime, time.struct_time]) toValidate.append(value) else: self.validateTypes(data, [bool, int, long, float, unicode,str, NoneType, datetime, time.struct_time]) try: data = toValidate.pop() except: data = None class ItemSchema(DDBObjectSchema): klass = OldItem classString = 'item' fields = DDBObjectSchema.fields + [ ('feed', SchemaObject(OldFeed)), ('seen', SchemaBool()), ('downloaders', SchemaList(SchemaObject(OldRemoteDownloader))), ('autoDownloaded', SchemaBool()), ('startingDownload', SchemaBool()), ('lastDownloadFailed', SchemaBool()), ('pendingManualDL', SchemaBool()), ('pendingReason', SchemaString()), ('entry', SchemaSimpleContainer()), ('expired', SchemaBool()), ('keep', SchemaBool()), ('creationTime', SchemaDateTime()), ('linkNumber', SchemaInt(noneOk=True)), ('iconCache', SchemaObject(IconCache, noneOk=True)), ('downloadedTime', SchemaDateTime(noneOk=True)), ] class FileItemSchema(ItemSchema): klass = OldFileItem classString = 'file-item' fields = ItemSchema.fields + [ ('filename', SchemaString()), ] class FeedSchema(DDBObjectSchema): klass = OldFeed classString = 'feed' fields = DDBObjectSchema.fields + [ ('origURL', SchemaString()), ('errorState', SchemaBool()), ('initiallyAutoDownloadable', SchemaBool()), ('loading', SchemaBool()), ('actualFeed', SchemaObject(OldFeedImpl)), ('iconCache', SchemaObject(IconCache, noneOk=True)), ] class FeedImplSchema(ObjectSchema): klass = OldFeedImpl classString = 'field-impl' fields = [ ('available', SchemaInt()), ('unwatched', SchemaInt()), ('url', SchemaString()), ('ufeed', SchemaObject(OldFeed)), ('items', SchemaList(SchemaObject(OldItem))), ('title', SchemaString()), ('created', SchemaDateTime()), ('autoDownloadable', SchemaBool()), ('startfrom', SchemaDateTime()), ('getEverything', SchemaBool()), ('maxNew', SchemaInt()), ('fallBehind', SchemaInt()), ('expire', SchemaString()), ('visible', SchemaBool()), ('updating', SchemaBool()), ('lastViewed', SchemaDateTime()), ('thumbURL', SchemaString()), ('updateFreq', SchemaInt()), ('expireTime', SchemaTimeDelta(noneOk=True)), ] class RSSFeedImplSchema(FeedImplSchema): klass = OldRSSFeedImpl classString = 'rss-feed-impl' fields = FeedImplSchema.fields + [ ('initialHTML', SchemaString(noneOk=True)), ('etag', SchemaString(noneOk=True)), ('modified', SchemaString(noneOk=True)), ] class ScraperFeedImplSchema(FeedImplSchema): klass = OldScraperFeedImpl classString = 'scraper-feed-impl' fields = FeedImplSchema.fields + [ ('initialHTML', SchemaString(noneOk=True)), ('initialCharset', SchemaString(noneOk=True)), ('linkHistory', SchemaSimpleContainer()), ] class SearchFeedImplSchema(FeedImplSchema): klass = OldSearchFeedImpl classString = 'search-feed-impl' fields = FeedImplSchema.fields + [ ('searching', SchemaBool()), ('lastEngine', SchemaString()), ('lastQuery', SchemaString()), ] class DirectoryFeedImplSchema(FeedImplSchema): klass = OldDirectoryFeedImpl classString = 'directory-feed-impl' # DirectoryFeedImpl doesn't have any addition fields over FeedImpl class SearchDownloadsFeedImplSchema(FeedImplSchema): klass = OldSearchDownloadsFeedImpl classString = 'search-downloads-feed-impl' # SearchDownloadsFeedImpl doesn't have any addition fields over FeedImpl class RemoteDownloaderSchema(DDBObjectSchema): klass = OldRemoteDownloader classString = 'remote-downloader' fields = DDBObjectSchema.fields + [ ('url', SchemaString()), ('itemList', SchemaList(SchemaObject(OldItem))), ('dlid', SchemaString()), ('contentType', SchemaString(noneOk=True)), ('status', SchemaSimpleContainer()), ] class HTTPAuthPasswordSchema(DDBObjectSchema): klass = OldHTTPAuthPassword classString = 'http-auth-password' fields = DDBObjectSchema.fields + [ ('username', SchemaString()), ('password', SchemaString()), ('host', SchemaString()), ('realm', SchemaString()), ('path', SchemaString()), ('authScheme', SchemaString()), ] class FolderSchema(DDBObjectSchema): klass = OldFolder classString = 'folder' fields = DDBObjectSchema.fields + [ ('feeds', SchemaList(SchemaInt())), ('title', SchemaString()), ] class ChannelGuideSchema(DDBObjectSchema): klass = OldChannelGuide classString = 'channel-guide' fields = DDBObjectSchema.fields + [ ('sawIntro', SchemaBool()), ('cachedGuideBody', SchemaString(noneOk=True)), ('loadedThisSession', SchemaBool()), ] objectSchemas = [ DDBObjectSchema, ItemSchema, FileItemSchema, FeedSchema, FeedImplSchema, RSSFeedImplSchema, ScraperFeedImplSchema, SearchFeedImplSchema, DirectoryFeedImplSchema, SearchDownloadsFeedImplSchema, RemoteDownloaderSchema, HTTPAuthPasswordSchema, FolderSchema, ChannelGuideSchema, ] def convertOldDatabase(databasePath): OldItem.allOldItems = set() shutil.copyfile(databasePath, databasePath + '.old') f = open(databasePath, 'rb') p = FakeClassUnpickler(f) data = p.load() if type(data) == types.ListType: # version 0 database objects = data else: # version 1 database (version, objects) = data # Objects used to be stored as (object, object) tuples. Remove the dup objects = [o[0] for o in objects] # drop any top-level DropItLikeItsHot instances objects = [o for o in objects if not hasattr(o, '__DropMeLikeItsHot')] # Set obj.id for any objects missing it idMissing = set() lastId = 0 for o in objects: if hasattr(o, 'id'): if o.id > lastId: lastId = o.id else: idMissing.add(o) for o in idMissing: lastId += 1 o.id = lastId # drop any downloaders that are referenced by items def dropItFilter(obj): return not hasattr(obj, '__DropMeLikeItsHot') for i in OldItem.allOldItems: i.downloaders = filter(dropItFilter, i.downloaders) storedatabase.saveObjectList(objects, databasePath, objectSchemas=objectSchemas, version=6)