home *** CD-ROM | disk | FTP | other *** search
/ Personal Computer World 2008 February / PCWFEB08.iso / Software / Freeware / Miro 1.0 / Miro_Installer.exe / xulrunner / python / httpclient.py < prev    next >
Encoding:
Python Source  |  2007-11-12  |  73.7 KB  |  1,903 lines

  1. # Miro - an RSS based video player application
  2. # Copyright (C) 2005-2007 Participatory Culture Foundation
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
  17.  
  18. """httpclient.py 
  19.  
  20. Implements an HTTP client.  The main way that this module is used is the
  21. grabURL function that's an asynchronous version of our old grabURL.
  22.  
  23. A lot of the code here comes from inspection of the httplib standard module.
  24. Some of it was taken more-or-less directly from there.  I (Ben Dean-Kawamura)
  25. believe our clients follow the HTTP 1.1 spec completely, I used RFC2616 as a
  26. reference (http://www.w3.org/Protocols/rfc2616/rfc2616.html).
  27. """
  28.  
  29. import errno
  30. import logging
  31. import re
  32. import socket
  33. import traceback
  34. from urlparse import urljoin
  35. from gtcache import gettext as _
  36.  
  37. from base64 import b64encode
  38.  
  39. from clock import clock
  40.  
  41. import httpauth
  42. import config
  43. import prefs
  44. from download_utils import URIPattern, cleanFilename, parseURL, defaultPort, getFileURLPath, filenameFromURL
  45. from xhtmltools import URLEncodeDict, multipartEncode
  46. import eventloop
  47. import util
  48. import sys
  49. import time
  50. import urllib
  51.  
  52. PIPELINING_ENABLED = True
  53. SOCKET_READ_TIMEOUT = 60
  54. SOCKET_INITIAL_READ_TIMEOUT = 30
  55. SOCKET_CONNECT_TIMEOUT = 15
  56.  
  57. class NetworkError(Exception):
  58.     """Base class for all errors that will be passed to errbacks from getURL
  59.     and friends.  NetworkErrors can be display in 2 ways:
  60.  
  61.     getFriendlyDescription() -- short, newbie friendly description 
  62.     getLongDescription() -- detailed description
  63.     """
  64.  
  65.     def __init__(self, shortDescription, longDescription=None):
  66.         if longDescription is None:
  67.             longDescription = shortDescription
  68.         self.friendlyDescription = _("Error: %s") % shortDescription
  69.         self.longDescription = longDescription
  70.  
  71.     def getFriendlyDescription(self):
  72.         return self.friendlyDescription
  73.  
  74.     def getLongDescription(self):
  75.         return self.longDescription
  76.  
  77.     def __str__(self):
  78.         return "%s: %s -- %s" % (self.__class__,
  79.                 self.getFriendlyDescription(), self.getLongDescription())
  80.  
  81. class ConnectionError(NetworkError):
  82.     def __init__(self, errorMessage):
  83.         self.friendlyDescription = _("Can't connect")
  84.         self.longDescription = _("Connection Error: %s") % \
  85.                                util.stringify(errorMessage, "replace")
  86.  
  87. class SSLConnectionError(ConnectionError):
  88.     def __init__(self):
  89.         self.friendlyDescription = _("Can't connect")
  90.         self.longDescription = _("SSL connection error")
  91.  
  92. class HTTPError(NetworkError):
  93.     def __init__(self, longDescription):
  94.         NetworkError.__init__(self, _("HTTP error"), longDescription)
  95. class BadStatusLine(HTTPError):
  96.     def __init__(self, line):
  97.         HTTPError.__init__(self, _("Bad Status Line: %s") % line)
  98. class BadHeaderLine(HTTPError):
  99.     def __init__(self, line):
  100.         HTTPError.__init__(self, _("Bad Header Line: %s") % line)
  101. class BadChunkSize(HTTPError):
  102.     def __init__(self, line):
  103.         HTTPError.__init__(self, _("Bad Chunk size: %s") % line)
  104. class CRLFExpected(HTTPError):
  105.     def __init__(self, crlf):
  106.         HTTPError.__init__(self, _("Expected CRLF got: %r") % crlf)
  107. class ServerClosedConnection(HTTPError):
  108.     def __init__(self, host):
  109.         HTTPError.__init__(self, _('%s closed connection') % host)
  110.  
  111. class UnexpectedStatusCode(HTTPError):
  112.     def __init__(self, code):
  113.         if code == 404:
  114.             self.friendlyDescription = _("File not found")
  115.             self.longDescription = _("Got 404 status code")
  116.         else:
  117.             HTTPError.__init__(self, _("Bad Status Code: %s") % code)
  118.  
  119. class AuthorizationFailed(NetworkError):
  120.     def __init__(self):
  121.         NetworkError.__init__(self, _("Authorization failed"))
  122.  
  123. class PipelinedRequestNeverStarted(NetworkError):
  124.     # User should never see this one
  125.     def __init__(self):
  126.         NetworkError.__init__(self, _("Internal Error"),
  127.                 _("Pipeline request never started"))
  128.  
  129. class ConnectionTimeout(NetworkError):
  130.     def __init__(self, host):
  131.         NetworkError.__init__(self, _('Timeout'),
  132.                 _('Connection to %s timed out') % host)
  133.  
  134. class MalformedURL(NetworkError):
  135.     def __init__(self, url):
  136.         NetworkError.__init__(self, _('Invalid URL'),
  137.                 _('"%s" is not a valid URL') % url)
  138.  
  139. class FileURLNotFoundError(NetworkError):
  140.     """A file: URL doesn't exist"""
  141.     def __init__(self, path):
  142.         NetworkError.__init__(self, _('File not found'),
  143.             _('The file: "%s" doesn\'t exist') % path)
  144.  
  145. class FileURLReadError(NetworkError):
  146.     def __init__(self, path):
  147.         NetworkError.__init__(self, _('Read error'),
  148.             _('Error while reading from "%s"') % path)
  149.  
  150. def trapCall(object, function, *args, **kwargs):
  151.     """Convenience function do a util.trapCall, where when = 'While talking to
  152.     the network'
  153.     """
  154.     return util.timeTrapCall("Calling %s on %s" % (function, object), function, *args, **kwargs)
  155.  
  156.  
  157. DATEINFUTURE = time.mktime( (2030, 7, 12, 12, 0, 0, 4, 193, -1) )
  158.  
  159. def get_cookie_expiration_date(val):
  160.     """Tries a bunch of possible cookie expiration date formats
  161.     until it finds the magic one (or doesn't and returns 0).
  162.     """
  163.     fmts = ( '%a, %d %b %Y %H:%M:%S %Z',
  164.              '%a, %d %b %y %H:%M:%S %Z',
  165.              '%a, %d-%b-%Y %H:%M:%S %Z',
  166.              '%a, %d-%b-%y %H:%M:%S %Z' )
  167.     
  168.     for fmt in fmts:
  169.         try:
  170.             return time.mktime(time.strptime(val, fmt))
  171.         except OverflowError, oe:
  172.             # an overflow error means the cookie expiration is far in the
  173.             # future.  so we return a date that's not so far in the
  174.             # future.
  175.             return DATEINFUTURE
  176.         except ValueError, ve:
  177.             pass
  178.  
  179.     print "DTV: Warning: Can't process cookie expiration: '%s'" % val
  180.     return 0
  181.  
  182.  
  183. class NetworkBuffer(object):
  184.     """Responsible for storing incomming network data and doing some basic
  185.     parsing of it.  I think this is about as fast as we can do things in pure
  186.     python, someday we may want to make it C...
  187.     """
  188.     def __init__(self):
  189.         self.chunks = []
  190.         self.length = 0
  191.  
  192.     def addData(self, data):
  193.         self.chunks.append(data)
  194.         self.length += len(data)
  195.  
  196.     def _mergeChunks(self):
  197.         self.chunks = [''.join(self.chunks)]
  198.  
  199.     def read(self, size=None):
  200.         """Read at most size bytes from the data that has been added to the
  201.         buffer.  """
  202.  
  203.         self._mergeChunks()
  204.         if size is not None:
  205.             rv = self.chunks[0][:size]
  206.             self.chunks[0] = self.chunks[0][len(rv):]
  207.         else:
  208.             rv = self.chunks[0]
  209.             self.chunks = []
  210.         self.length -= len(rv)
  211.         return rv
  212.  
  213.     def readline(self):
  214.         """Like a file readline, with several difference:  
  215.         * If there isn't a full line ready to be read we return None.  
  216.         * Doesn't include the trailing line separator.
  217.         * Both "\r\n" and "\n" act as a line ender
  218.         """
  219.  
  220.         self._mergeChunks()
  221.         split = self.chunks[0].split("\n", 1)
  222.         if len(split) == 2:
  223.             self.chunks[0] = split[1]
  224.             self.length = len(self.chunks[0])
  225.             if split[0].endswith("\r"):
  226.                 return split[0][:-1]
  227.             else:
  228.                 return split[0]
  229.         else:
  230.             return None
  231.  
  232.     def unread(self, data):
  233.         """Put back read data.  This make is like the data was never read at
  234.         all.
  235.         """
  236.         self.chunks.insert(0, data)
  237.         self.length += len(data)
  238.  
  239.     def getValue(self):
  240.         self._mergeChunks()
  241.         return self.chunks[0]
  242.  
  243. class _Packet(object):
  244.     """A packet of data for the AsyncSocket class
  245.     """
  246.     def __init__ (self, data, callback = None):
  247.         self.data = data
  248.         self.callback = callback
  249.  
  250. class AsyncSocket(object):
  251.     """Socket class that uses our new fangled asynchronous eventloop
  252.     module.
  253.     """
  254.  
  255.     MEMORY_ERROR_LIMIT = 5
  256.  
  257.     def __init__(self, closeCallback=None):
  258.         """Create an AsyncSocket.  If closeCallback is given, it will be
  259.         called if we detect that the socket has been closed durring a
  260.         read/write operation.  The arguments will be the AsyncSocket object
  261.         and either socket.SHUT_RD or socket.SHUT_WR.
  262.         """
  263.         self.toSend = []
  264.         self.readSize = 4096
  265.         self.socket = None
  266.         self.readCallback = None
  267.         self.closeCallback = closeCallback
  268.         self.readTimeout = None
  269.         self.timedOut = False
  270.         self.connectionErrback = None
  271.         self.disableReadTimeout = False
  272.         self.readSomeData = False
  273.         self.name = ""
  274.         self.lastClock = None
  275.         self.memoryErrors = 0
  276.  
  277.     def __str__(self):
  278.         if self.name:
  279.             return "%s: %s" % (type(self).__name__, self.name)
  280.         else:
  281.             return "Unknown %s" % (type(self).__name__,)
  282.  
  283.     # The complication in the timeout code is because creating and
  284.     # cancelling a timeout costs some memory (timeout is in memory
  285.     # until it goes off, even if cancelled.)
  286.     def startReadTimeout(self):
  287.         if self.disableReadTimeout:
  288.             return
  289.         self.lastClock = clock()
  290.         if self.readTimeout is not None:
  291.             return
  292.         self.readTimeout = eventloop.addTimeout(SOCKET_INITIAL_READ_TIMEOUT, self.onReadTimeout,
  293.                 "AsyncSocket.onReadTimeout")
  294.  
  295.     def stopReadTimeout(self):
  296.         if self.readTimeout is not None:
  297.             self.readTimeout.cancel()
  298.             self.readTimeout = None
  299.  
  300.     def openConnection(self, host, port, callback, errback, disableReadTimeout = None):
  301.         """Open a connection.  On success, callback will be called with this
  302.         object.
  303.         """
  304.         if disableReadTimeout is not None:
  305.             self.disableReadTimeout = disableReadTimeout
  306.         self.name = "Outgoing %s:%s" % (host, port)
  307.  
  308.         try:
  309.             self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  310.         except socket.error, e:
  311.             trapCall(self, errback, ConnectionError(e[1]))
  312.             return
  313.         self.socket.setblocking(0)
  314.         self.connectionErrback = errback
  315.         def handleGetHostByNameException(e):
  316.             trapCall(self, errback, ConnectionError(e[1]))
  317.         def onAddressLookup(address):
  318.             if self.socket is None:
  319.                 # the connection was closed while we were calling gethostbyname
  320.                 return
  321.             try:
  322.                 rv = self.socket.connect_ex((address, port))
  323.             except socket.gaierror:
  324.                 trapCall(self, errback, ConnectionError('gaierror'))
  325.                 return
  326.             if rv in (0, errno.EINPROGRESS, errno.EWOULDBLOCK):
  327.                 eventloop.addWriteCallback(self.socket, onWriteReady)
  328.                 self.socketConnectTimeout = eventloop.addTimeout(
  329.                         SOCKET_CONNECT_TIMEOUT, onWriteTimeout,
  330.                         "socket connect timeout")
  331.             else:
  332.                 msg = errno.errorcode[rv]
  333.                 trapCall(self, errback, ConnectionError(msg))
  334.         def onWriteReady():
  335.             eventloop.removeWriteCallback(self.socket)
  336.             self.socketConnectTimeout.cancel()
  337.             rv = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR)
  338.             if rv == 0:
  339.                 trapCall(self, callback, self)
  340.             else:
  341.                 msg = errno.errorcode.get(rv, _('Unknown Error code'))
  342.                 trapCall(self, errback, ConnectionError(msg))
  343.             self.connectionErrback = None
  344.         def onWriteTimeout():
  345.             eventloop.removeWriteCallback(self.socket)
  346.             trapCall(self, errback, ConnectionTimeout(host))
  347.             self.connectionErrback = None
  348.         eventloop.callInThread(onAddressLookup, handleGetHostByNameException,
  349.                 socket.gethostbyname, "getHostByName - %s" % host, host)
  350.  
  351.     def acceptConnection(self, host, port, callback, errback):
  352.         def finishAccept():
  353.             eventloop.removeReadCallback(self.socket)
  354.             (self.socket, addr) = self.socket.accept()
  355.             trapCall(self, callback, self)
  356.             self.connectionErrback = None
  357.  
  358.         self.name = "Incoming %s:%s" % (host, port)
  359.         self.connectionErrback = errback
  360.         self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  361.         self.socket.bind( (host, port) )
  362.         (self.addr, self.port) = self.socket.getsockname()
  363.         self.socket.listen(63)
  364.         eventloop.addReadCallback(self.socket, finishAccept)
  365.  
  366.     def closeConnection(self):
  367.         if self.isOpen():
  368.             eventloop.stopHandlingSocket(self.socket)
  369.             self.stopReadTimeout()
  370.             self.socket.close()
  371.             self.socket = None
  372.             if self.connectionErrback is not None:
  373.                 error = NetworkError(_("Connection closed"))
  374.                 trapCall(self, self.connectionErrback, error)
  375.                 self.connectionErrback = None
  376.  
  377.     def isOpen(self):
  378.         return self.socket is not None
  379.  
  380.     def sendData(self, data, callback = None):
  381.         """Send data out to the socket when it becomes ready.
  382.         
  383.         NOTE: currently we have no way of detecting when the data gets sent
  384.         out, or if errors happen.
  385.         """
  386.  
  387.         if not self.isOpen():
  388.             raise ValueError("Socket not connected")
  389.         self.toSend.append(_Packet(data, callback))
  390.         eventloop.addWriteCallback(self.socket, self.onWriteReady)
  391.  
  392.     def startReading(self, readCallback):
  393.         """Start reading from the socket.  When data becomes available it will
  394.         be passed to readCallback.  If there is already a read callback, it
  395.         will be replaced.
  396.         """
  397.  
  398.         if not self.isOpen():
  399.             raise ValueError("Socket not connected")
  400.         self.readCallback = readCallback
  401.         eventloop.addReadCallback(self.socket, self.onReadReady)
  402.         self.startReadTimeout()
  403.  
  404.     def stopReading(self):
  405.         """Stop reading from the socket."""
  406.  
  407.         if not self.isOpen():
  408.             raise ValueError("Socket not connected")
  409.         self.readCallback = None
  410.         eventloop.removeReadCallback(self.socket)
  411.         self.stopReadTimeout()
  412.  
  413.     def onReadTimeout(self):
  414.         if self.readSomeData:
  415.             timeout = SOCKET_READ_TIMEOUT
  416.         else:
  417.             timeout = SOCKET_INITIAL_READ_TIMEOUT
  418.  
  419.         if clock() < self.lastClock + timeout:
  420.             self.readTimeout = eventloop.addTimeout(self.lastClock + timeout - clock(), self.onReadTimeout,
  421.                 "AsyncSocket.onReadTimeout")
  422.         else:
  423.             self.readTimeout = None
  424.             self.timedOut = True
  425.             self.handleEarlyClose('read')
  426.  
  427.     def handleSocketError(self, code, msg, operation):
  428.         if code in (errno.EWOULDBLOCK, errno.EINTR):
  429.             return
  430.  
  431.         if operation == "write":
  432.             expectedErrors = (errno.EPIPE, errno.ECONNRESET)
  433.         else:
  434.             expectedErrors = (errno.ECONNREFUSED, errno.ECONNRESET)
  435.         if code not in expectedErrors:
  436.             print "WARNING, got unexpected error during %s" % operation
  437.             print "%s: %s" % (errno.errorcode.get(code), msg)
  438.         self.handleEarlyClose(operation)
  439.  
  440.     def onWriteReady(self):
  441.         try:
  442.             if len(self.toSend) > 0:
  443.                 sent = self.socket.send(self.toSend[0].data)
  444.             else:
  445.                 sent = 0
  446.         except socket.error, (code, msg):
  447.             self.handleSocketError(code, msg, "write")
  448.         else:
  449.             self.handleSentData(sent)
  450.  
  451.     def handleSentData(self, sent):
  452.         if len(self.toSend) > 0:
  453.             self.toSend[0].data = self.toSend[0].data[sent:]
  454.             if len(self.toSend[0].data) == 0:
  455.                 if self.toSend[0].callback:
  456.                     self.toSend[0].callback()
  457.                 self.toSend = self.toSend[1:]
  458.         if len(self.toSend) == 0:
  459.             eventloop.removeWriteCallback(self.socket)
  460.  
  461.     def onReadReady(self):
  462.         try:
  463.             data = self.socket.recv(self.readSize)
  464.         except socket.error, (code, msg):
  465.             self.handleSocketError(code, msg, "read")
  466.         except MemoryError:
  467.             # This happens because of a windows bug in the socket code (see
  468.             # #4373).  Let's hope that things clear themselves up next time we
  469.             # read.
  470.             self.memoryErrors += 1
  471.             if self.memoryErrors > self.MEMORY_ERROR_LIMIT:
  472.                 print "ERROR: Too many MemoryErrors on %s" % self
  473.                 self.handleEarlyClose('read')
  474.             else:
  475.                 print "WARNING: Memory error while reading from %s" % self
  476.         else:
  477.             self.memoryErrors = 0
  478.             self.handleReadData(data)
  479.  
  480.     def handleReadData(self, data):
  481.         self.startReadTimeout()
  482.         if data == '':
  483.             if self.closeCallback:
  484.                 trapCall(self, self.closeCallback, self, socket.SHUT_RD)
  485.         else:
  486.             self.readSomeData = True
  487.             trapCall(self, self.readCallback, data)
  488.  
  489.     def handleEarlyClose(self, operation):
  490.         self.closeConnection()
  491.         if self.closeCallback:
  492.             if operation == 'read':
  493.                 type = socket.SHUT_RD
  494.             else:
  495.                 type = socket.SHUT_WR
  496.             trapCall(self, self.closeCallback, self, type)
  497.  
  498. class AsyncSSLStream(AsyncSocket):
  499.     def __init__(self, closeCallback=None):
  500.         super(AsyncSSLStream, self).__init__(closeCallback)
  501.         self.interruptedOperation = None
  502.  
  503.     def openConnection(self, host, port, callback, errback, disableReadTimeout = None):
  504.         def onSocketOpen(self):
  505.             self.socket.setblocking(1)
  506.             eventloop.callInThread(onSSLOpen, handleSSLError, socket.ssl,
  507.                                    "AsyncSSL onSocketOpen()",
  508.                                    self.socket)
  509.         def onSSLOpen(ssl):
  510.             if self.socket is None:
  511.                 # the connection was closed while we were calling socket.ssl
  512.                 return
  513.             self.socket.setblocking(0)
  514.             self.ssl = ssl
  515.             # finally we can call the actuall callback
  516.             callback(self)
  517.         def handleSSLError(error):
  518.             errback(SSLConnectionError())
  519.         super(AsyncSSLStream, self).openConnection(host, port, onSocketOpen,
  520.                 errback, disableReadTimeout)
  521.  
  522.     def resumeNormalCallbacks(self):
  523.         if self.readCallback is not None:
  524.             eventloop.addReadCallback(self.socket, self.onReadReady)
  525.         if len(self.toSend) != 0:
  526.             eventloop.addWriteCallback(self.socket, self.onWriteReady)
  527.  
  528.     def handleSocketError(self, code, msg, operation):
  529.         if code in (socket.SSL_ERROR_WANT_READ, socket.SSL_ERROR_WANT_WRITE):
  530.             if self.interruptedOperation is None:
  531.                 self.interruptedOperation = operation
  532.             elif self.interruptedOperation != operation:
  533.                 util.failed("When talking to the network", 
  534.                 details="socket error for the wrong SSL operation")
  535.                 self.closeConnection()
  536.                 return
  537.             eventloop.stopHandlingSocket(self.socket)
  538.             if code == socket.SSL_ERROR_WANT_READ:
  539.                 eventloop.addReadCallback(self.socket, self.onReadReady)
  540.             else:
  541.                 eventloop.addWriteCallback(self.socket, self.onWriteReady)
  542.         elif code in (socket.SSL_ERROR_ZERO_RETURN, socket.SSL_ERROR_SSL,
  543.                 socket.SSL_ERROR_SYSCALL, socket.SSL_ERROR_EOF):
  544.             self.handleEarlyClose(operation)
  545.         else:
  546.             super(AsyncSSLStream, self).handleSocketError(code, msg,
  547.                     operation)
  548.  
  549.     def onWriteReady(self):
  550.         if self.interruptedOperation == 'read':
  551.             return self.onReadReady()
  552.         try:
  553.             if len(self.toSend) > 0:
  554.                 sent = self.ssl.write(self.toSend[0].data)
  555.             else:
  556.                 sent = 0
  557.         except socket.error, (code, msg):
  558.             self.handleSocketError(code, msg, "write")
  559.         else:
  560.             if self.interruptedOperation == 'write':
  561.                 self.resumeNormalCallbacks()
  562.                 self.interruptedOperation = None
  563.             self.handleSentData(sent)
  564.  
  565.     def onReadReady(self):
  566.         if self.interruptedOperation == 'write':
  567.             return self.onWriteReady()
  568.         try:
  569.             data = self.ssl.read(self.readSize)
  570.         except socket.error, (code, msg):
  571.             self.handleSocketError(code, msg, "read")
  572.         else:
  573.             if self.interruptedOperation == 'read':
  574.                 self.resumeNormalCallbacks()
  575.                 self.interruptedOperation = None
  576.             self.handleReadData(data)
  577.  
  578. class ProxiedAsyncSSLStream(AsyncSSLStream):
  579.     def openConnection(self, host, port, callback, errback, disableReadTimeout):
  580.         def onSocketOpen(self):
  581.             self.socket.setblocking(1)
  582.             eventloop.callInThread(onSSLOpen, handleSSLError, lambda: openProxyConnection(self),
  583.                                    "ProxiedAsyncSSL openProxyConnection()")
  584.         def openProxyConnection(self):
  585.             headers = {'User-Agent': '%s/%s (%s)' % (
  586.                 config.get(prefs.SHORT_APP_NAME),
  587.                 config.get(prefs.APP_VERSION),
  588.                 config.get(prefs.PROJECT_URL)),
  589.                 "Host": host}
  590.             if config.get(prefs.HTTP_PROXY_AUTHORIZATION_ACTIVE):
  591.                 username = config.get(prefs.HTTP_PROXY_AUTHORIZATION_USERNAME)
  592.                 password = config.get(prefs.HTTP_PROXY_AUTHORIZATION_PASSWORD)
  593.                 authString = username+':'+password
  594.                 authString = b64encode(authString)
  595.                 headers['ProxyAuthorization'] = "Basic " + authString
  596.  
  597.             connectString = "CONNECT %s:%d HTTP/1.1\r\n" % (host, port)
  598.             for header, value in headers.items():
  599.                 connectString += ('%s: %s\r\n' % (header, value))
  600.             connectString += "\r\n"
  601.  
  602.             try:
  603.                 self.socket.send(connectString)
  604.                 data = ""
  605.                 while (data.find("\r\n\r\n") == -1):
  606.                     data += self.socket.recv(1)
  607.                 data = data.split("\r\n")
  608.                 if (-1 == data[0].find(' 200 ')):                   
  609.                     eventloop.addIdle(lambda :handleSSLError(NetworkError(data[0])),"Network Error")
  610.                 else:
  611.                     return socket.ssl(self.socket)
  612.             except socket.error, (code, msg):
  613.                 handleSSLError(msg)
  614.  
  615.         def onSSLOpen(ssl):
  616.             if self.socket is None or ssl is None:
  617.                 # the connection was closed while we were calling socket.ssl
  618.                 return
  619.             self.socket.setblocking(0)
  620.             self.ssl = ssl
  621.             # finally we can call the actuall callback
  622.             callback(self)
  623.         def handleSSLError(error):
  624.             errback(SSLConnectionError())
  625.         proxy_host = config.get(prefs.HTTP_PROXY_HOST)
  626.         proxy_port = config.get(prefs.HTTP_PROXY_PORT)
  627.         AsyncSocket.openConnection(self, proxy_host, proxy_port, onSocketOpen,
  628.                 errback, disableReadTimeout)
  629.     
  630.     
  631. class ConnectionHandler(object):
  632.     """Base class to handle asynchronous network streams.  It implements a
  633.     simple state machine to deal with incomming data.
  634.  
  635.     Sending data: Use the sendData() method.
  636.  
  637.     Reading Data: Add entries to the state dictionary, which maps strings to
  638.     methods.  The state methods will be called when there is data available,
  639.     which can be read from the buffer variable.  The states dictionary can
  640.     contain a None value, to signal that the handler isn't interested in
  641.     reading at that point.  Use changeState() to switch states.
  642.  
  643.     Subclasses should override tho the handleClose() method to handle the
  644.     socket closing.
  645.     """
  646.  
  647.     streamFactory = AsyncSocket
  648.  
  649.     def __init__(self):
  650.         self.buffer = NetworkBuffer()
  651.         self.states = {'initializing': None, 'closed': None}
  652.         self.stream = self.streamFactory(closeCallback=self.closeCallback)
  653.         self.changeState('initializing')
  654.         self.name = ""
  655.  
  656.     def __str__(self):
  657.         return "%s -- %s" % (self.__class__, self.state)
  658.  
  659.     def openConnection(self, host, port, callback, errback, disableReadTimeout=None):
  660.         self.name = "Outgoing %s:%s" % (host, port)
  661.         self.host = host
  662.         self.port = port
  663.         def callbackIntercept(asyncSocket):
  664.             if callback:
  665.                 trapCall(self, callback, self)
  666.         self.stream.openConnection(host, port, callbackIntercept, errback, disableReadTimeout)
  667.  
  668.     def closeConnection(self):
  669.         if self.stream.isOpen():
  670.             self.stream.closeConnection()
  671.         self.changeState('closed')
  672.  
  673.     def sendData(self, data, callback = None):
  674.         self.stream.sendData(data, callback)
  675.  
  676.     def changeState(self, newState):
  677.         self.readHandler = self.states[newState]
  678.         self.state = newState
  679.         self.updateReadCallback()
  680.  
  681.     def updateReadCallback(self):
  682.         if self.readHandler is not None:
  683.             self.stream.startReading(self.handleData)
  684.         elif self.stream.isOpen():
  685.             try:
  686.                 self.stream.stopReading()
  687.             except KeyError:
  688.                 pass
  689.  
  690.     def handleData(self, data):
  691.         self.buffer.addData(data)
  692.         lastState = self.state
  693.         self.readHandler()
  694.         # If we switch states, continue processing the buffer.  There may be
  695.         # extra data that the last read handler didn't read in
  696.         while self.readHandler is not None and lastState != self.state:
  697.             lastState = self.state
  698.             self.readHandler()
  699.  
  700.     def closeCallback(self, stream, type):
  701.         self.handleClose(type)
  702.  
  703.     def handleClose(self, type):
  704.         """Handle our stream becoming closed.  Type is either socket.SHUT_RD,
  705.         or socket.SHUT_WR.
  706.         """
  707.         raise NotImplementedError()
  708.  
  709.  
  710. class HTTPConnection(ConnectionHandler):
  711.     scheme = 'http'
  712.  
  713.     def __init__(self, closeCallback=None, readyCallback=None):
  714.         super(HTTPConnection, self).__init__()
  715.         self.shortVersion = 0
  716.         self.states['ready'] = None
  717.         self.states['response-status'] = self.onStatusData
  718.         self.states['response-headers'] = self.onHeaderData
  719.         self.states['response-body'] = self.onBodyData
  720.         self.states['chunk-size'] = self.onChunkSizeData
  721.         self.states['chunk-data'] = self.onChunkData
  722.         self.states['chunk-crlf'] = self.onChunkCRLFData
  723.         self.states['chunk-trailer'] = self.onChunkTrailerData
  724.         self.changeState('ready')
  725.         self.idleSince = clock()
  726.         self.unparsedHeaderLine = ''
  727.         self.pipelinedRequest = None
  728.         self.closeCallback = closeCallback
  729.         self.readyCallback = readyCallback
  730.         self.requestsFinished = 0
  731.         self.bytesRead = 0
  732.         self.sentReadyCallback = False
  733.         self.headerCallback = self.bodyDataCallback = None
  734.  
  735.     def handleData(self, data):
  736.         self.bytesRead += len(data)
  737.         super(HTTPConnection, self).handleData(data)
  738.  
  739.     def closeConnection(self):
  740.         super(HTTPConnection, self).closeConnection()
  741.         if self.closeCallback is not None:
  742.             self.closeCallback(self)
  743.             self.closeCallback = None
  744.         self.checkPipelineNotStarted()
  745.  
  746.     def checkPipelineNotStarted(self):
  747.         """Call this when the connection is closed by Democracy or the other
  748.         side.  It will check if we have an unstarted pipeline request and 
  749.         send it the PipelinedRequestNeverStarted error
  750.         """
  751.  
  752.         if self.pipelinedRequest is not None:
  753.             errback = self.pipelinedRequest[1]
  754.             trapCall(self, errback, PipelinedRequestNeverStarted())
  755.             self.pipelinedRequest = None
  756.  
  757.     def canSendRequest(self):
  758.         return (self.state == 'ready' or 
  759.                 (self.state != 'closed' and self.pipelinedRequest is None and
  760.                     not self.willClose and PIPELINING_ENABLED))
  761.  
  762.     def sendRequest(self, callback, errback, host, port,
  763.                     requestStartCallback=None, headerCallback=None,
  764.                     bodyDataCallback = None, method="GET", path='/',
  765.                     headers=None, postVariables = None, postFiles = None):
  766.         """Sending an HTTP Request.  callback will be called if the request
  767.         completes normally, errback will be called if there is a network
  768.         error.
  769.  
  770.         Callback will be passed a dictionary that represents the HTTP
  771.         response,  it will have an entry for each header sent by the server as
  772.         well as as the following keys:
  773.             body, version, status, reason, method, path, host, port
  774.         They should be self explanatory, status and port will be integers, the
  775.         other items will be strings.
  776.  
  777.         If requestStartCallback is given, it will be called just before the
  778.         we start receiving data for the request (this can be a while after
  779.         sending the request in the case of pipelined requests).  It will be
  780.         passed this connection object.
  781.  
  782.         If headerCallback is given, it will be called when the headers are
  783.         read in.  It will be passed a response object whose body is set to
  784.         None.
  785.  
  786.         If bodyDataCallback is given it will be called as we read in the data
  787.         for the body.  Also, the connection won't store the body in memory,
  788.         and the callback is called, it will be passed None for the body.
  789.  
  790.         postVariables is a dictionary of variable names to values
  791.  
  792.         postFiles is a dictionary of variable names to dictionaries
  793.         containing filename, mimetype, and handle attributes. Handle
  794.         should be an already open file handle.
  795.         """
  796.  
  797.         if not self.canSendRequest():
  798.             raise NetworkError(_("Unknown"), 
  799.                     _("Internal Error: Not ready to send"))
  800.  
  801.         if headers is None:
  802.             headers = {}
  803.         else:
  804.             headers = headers.copy()
  805.         headers['Host'] = host.encode('idna')
  806.         if port != defaultPort(self.scheme):
  807.             headers['Host'] += ':%d' % port
  808.         headers['Accept-Encoding'] = 'identity'
  809.  
  810.         if (method == "POST" and postVariables is not None and
  811.                             len(postVariables) > 0 and postFiles is None):
  812.             postData = URLEncodeDict(postVariables)
  813.             headers['Content-Type'] = 'application/x-www-form-urlencoded'
  814.             headers['Content-Length'] = '%d' % len(postData)
  815.         elif method == "POST" and postFiles is not None:
  816.             (postData, boundary) = multipartEncode(postVariables, postFiles)
  817.             headers['Content-Type'] = 'multipart/form-data; boundary=%s' % boundary
  818.             headers['Content-Length'] = '%d' % len(postData)
  819.         else:
  820.             postData = None
  821.  
  822.         self.sendRequestData(method, path, headers, postData)
  823.         args = (callback, errback, requestStartCallback, headerCallback,
  824.                 bodyDataCallback, method, path, headers)
  825.         if self.state == 'ready':
  826.             self.startNewRequest(*args)
  827.         else:
  828.             self.pipelinedRequest = args
  829.  
  830.     def startNewRequest(self, callback, errback, requestStartCallback,
  831.             headerCallback, bodyDataCallback, method, path, headers):
  832.         """Called when we're ready to start processing a new request, either
  833.         because one has just been made, or because we've pipelined one, and
  834.         the previous request is done.
  835.         """
  836.  
  837.         if requestStartCallback:
  838.             trapCall(self, requestStartCallback, self)
  839.             if self.state == 'closed':
  840.                 return
  841.  
  842.         self.callback = callback
  843.         self.errback = errback
  844.         self.headerCallback = headerCallback
  845.         self.bodyDataCallback = bodyDataCallback
  846.         self.method = method
  847.         self.path = path
  848.         self.requestHeaders = headers
  849.         self.headers = {}
  850.         self.contentLength = self.version = self.status = self.reason = None
  851.         self.bytesRead = 0
  852.         self.body = ''
  853.         self.willClose = True 
  854.         # Assume we will close, until we get the headers
  855.         self.chunked = False
  856.         self.chunks = []
  857.         self.idleSince = None
  858.         self.sentReadyCallback = False
  859.         self.changeState('response-status')
  860.  
  861.     def sendRequestData(self, method, path, headers, data = None):
  862.         sendOut = []
  863.         path = path.encode("ascii", "replace")
  864.         path = urllib.quote(path, safe="-_.!~*'();/?:@&=+$,%#")
  865.         sendOut.append('%s %s HTTP/1.1\r\n' % (method, path))
  866.         for header, value in headers.items():
  867.             sendOut.append('%s: %s\r\n' % (header, value))
  868.         sendOut.append('\r\n')
  869.         if data is not None:
  870.             sendOut.append(data)
  871.         self.sendData(''.join(sendOut))
  872.  
  873.     def onStatusData(self):
  874.         line = self.buffer.readline()
  875.         if line is not None:
  876.             self.handleStatusLine(line)
  877.             if self.state == 'closed':
  878.                 return
  879.             if self.shortVersion != 9:
  880.                 self.changeState('response-headers')
  881.             else:
  882.                 self.startBody()
  883.  
  884.     def onHeaderData(self):
  885.         while self.state == 'response-headers':
  886.             line = self.buffer.readline()
  887.             if line is None:
  888.                 break
  889.             self.handleHeaderLine(line)
  890.         
  891.     def onBodyData(self):
  892.         if self.bodyDataCallback:
  893.             if self.contentLength is None:
  894.                 data = self.buffer.read()
  895.             else:
  896.                 bytesLeft = self.contentLength - self.bodyBytesRead
  897.                 data = self.buffer.read(bytesLeft)
  898.             if data == '':
  899.                 return
  900.             self.bodyBytesRead += len(data)
  901.             trapCall(self, self.bodyDataCallback, data)
  902.             if self.state == 'closed':
  903.                 return 
  904.             if (self.contentLength is not None and 
  905.                     self.bodyBytesRead == self.contentLength):
  906.                 self.finishRequest()
  907.         elif (self.contentLength is not None and 
  908.                 self.buffer.length >= self.contentLength):
  909.             self.body = self.buffer.read(self.contentLength)
  910.             self.finishRequest()
  911.  
  912.     def onChunkSizeData(self):
  913.         line = self.buffer.readline()
  914.         if line is not None:
  915.             sizeString = line.split(';', 1)[0] # ignore chunk-extensions
  916.             try:
  917.                 self.chunkSize = int(sizeString, 16)
  918.             except ValueError:
  919.                 self.handleError(BadChunkSize(line))
  920.                 return
  921.             if self.chunkSize != 0:
  922.                 self.chunkBytesRead = 0
  923.                 self.changeState('chunk-data')
  924.             else:
  925.                 self.changeState('chunk-trailer')
  926.  
  927.     def onChunkData(self):
  928.         if self.bodyDataCallback:
  929.             bytesLeft = self.chunkSize - self.chunkBytesRead
  930.             data = self.buffer.read(bytesLeft)
  931.             self.chunkBytesRead += len(data)
  932.             if data == '':
  933.                 return
  934.             trapCall(self, self.bodyDataCallback, data)
  935.             if self.chunkBytesRead == self.chunkSize:
  936.                 self.changeState('chunk-crlf')
  937.         elif self.buffer.length >= self.chunkSize:
  938.             self.chunks.append(self.buffer.read(self.chunkSize))
  939.             self.changeState('chunk-crlf')
  940.  
  941.     def onChunkCRLFData(self):
  942.         if self.buffer.length >= 2:
  943.             crlf = self.buffer.read(2)
  944.             if crlf != "\r\n":
  945.                 self.handleError(CRLFExpected(crlf))
  946.             else:
  947.                 self.changeState('chunk-size')
  948.  
  949.     def onChunkTrailerData(self):
  950.         # discard all trailers, we shouldn't have any
  951.         line = self.buffer.readline()
  952.         while line is not None:
  953.             if line == '':
  954.                 self.finishRequest()
  955.                 break
  956.             line = self.buffer.readline()
  957.  
  958.     def handleStatusLine(self, line):
  959.         try:
  960.             (version, status, reason) = line.split(None, 2)
  961.         except ValueError:
  962.             try:
  963.                 (version, status) = line.split(None, 1)
  964.                 reason = ""
  965.             except ValueError:
  966.                 # empty version will cause next test to fail and status
  967.                 # will be treated as 0.9 response.
  968.                 version = ""
  969.         if not version.startswith('HTTP/'):
  970.             # assume it's a Simple-Response from an 0.9 server
  971.             self.buffer.unread(line + '\r\n')
  972.             self.version = "HTTP/0.9"
  973.             self.status = 200
  974.             self.reason = ""
  975.             self.shortVersion = 9
  976.         else:
  977.             try:
  978.                 status = int(status)
  979.                 if status < 100 or status > 599:
  980.                     self.handleError(BadStatusLine(line))
  981.                     return
  982.             except ValueError:
  983.                 self.handleError(BadStatusLine(line))
  984.                 return
  985.             if version == 'HTTP/1.0':
  986.                 self.shortVersion = 10
  987.             elif version.startswith('HTTP/1.'):
  988.                 # use HTTP/1.1 code for HTTP/1.x where x>=1
  989.                 self.shortVersion = 11
  990.             else:
  991.                 self.handleError(BadStatusLine(line))
  992.                 return
  993.             self.version = version
  994.             self.status = status
  995.             self.reason = reason
  996.  
  997.     def handleHeaderLine(self, line):
  998.         if self.unparsedHeaderLine == '':
  999.             if line == '':
  1000.                 if self.status != 100:
  1001.                     self.startBody()
  1002.                 else:
  1003.                     self.changeState('response-status')
  1004.             elif ':' in line:
  1005.                 self.parseHeader(line)
  1006.             else:
  1007.                 self.unparsedHeaderLine = line
  1008.         else:
  1009.             # our last line may have been a continued header, or it may be
  1010.             # garbage, 
  1011.             if len(line) > 0 and line[0] in (' ', '\t'):
  1012.                 self.unparsedHeaderLine += line.lstrip()
  1013.                 if ':' in self.unparsedHeaderLine:
  1014.                     self.parseHeader(self.unparsedHeaderLine)
  1015.                     self.unparsedHeaderLine = ''
  1016.             else:
  1017.                 msg = "line: %s, next line: %s" % (self.unparsedHeaderLine, 
  1018.                         line)
  1019.                 self.handleError(BadHeaderLine(msg))
  1020.  
  1021.     def parseHeader(self, line):
  1022.         header, value = line.split(":", 1)
  1023.         value = value.strip()
  1024.         header = header.lstrip().lower()
  1025.         if value == '':
  1026.             print "DTV: Warning: Bad Header from %s://%s:%s%s (%s)" % (self.scheme, self.host, self.port, self.path, line)
  1027.         if header not in self.headers:
  1028.             self.headers[header] = value
  1029.         else:
  1030.             self.headers[header] += (',%s' % value)
  1031.  
  1032.     def startBody(self):
  1033.         self.findExpectedLength()
  1034.         self.checkChunked()
  1035.         self.decideWillClose()
  1036.         if self.headerCallback:
  1037.             trapCall(self, self.headerCallback, self.makeResponse())
  1038.         if self.state == 'closed':
  1039.             return # maybe the header callback cancelled this request
  1040.         if ((100 <= self.status <= 199) or self.status in (204, 304) or
  1041.                 self.method == 'HEAD' or self.contentLength == 0):
  1042.             self.finishRequest()
  1043.         else:
  1044.             if self.bodyDataCallback:
  1045.                 self.bodyBytesRead = 0
  1046.             if not self.chunked:
  1047.                 self.changeState('response-body')
  1048.             else:
  1049.                 self.changeState('chunk-size')
  1050.         self.maybeSendReadyCallback()
  1051.  
  1052.     def checkChunked(self):
  1053.         te = self.headers.get('transfer-encoding', '')
  1054.         self.chunked = (te.lower() == 'chunked')
  1055.  
  1056.     def findExpectedLength(self):
  1057.         self.contentLength = None
  1058.         if self.status == 416:
  1059.             try:
  1060.                 contentRange = self.headers['content-range']
  1061.             except KeyError:
  1062.                 pass
  1063.             else:
  1064.                 m = re.search('bytes\s+\*/(\d+)', contentRange)
  1065.                 if m is not None:
  1066.                     try:
  1067.                         self.contentLength = int(m.group(1))
  1068.                     except (ValueError, TypeError):
  1069.                         pass
  1070.         if (self.contentLength is None and 
  1071.                 self.headers.get('transfer-encoding') in ('identity', None)):
  1072.             try:
  1073.                 self.contentLength = int(self.headers['content-length'])
  1074.             except (ValueError, KeyError):
  1075.                 pass
  1076.         if self.contentLength < 0:
  1077.             self.contentLength = None
  1078.  
  1079.     def decideWillClose(self):
  1080.         if self.shortVersion != 11:
  1081.             # Close all connections to HTTP/1.0 servers.
  1082.             self.willClose = True
  1083.         elif 'close' in self.headers.get('connection', '').lower():
  1084.             self.willClose = True
  1085.         elif not self.chunked and self.contentLength is None:
  1086.             # if we aren't chunked and didn't get a content length, we have to
  1087.             # assume the connection will close
  1088.             self.willClose = True
  1089.         else:
  1090.             # HTTP/1.1 connections are assumed to stay open 
  1091.             self.willClose = False
  1092.  
  1093.     def finishRequest(self):
  1094.         # calculate the response and and remember our callback.  They may
  1095.         # change after we start a pielined response.
  1096.         origCallback = self.callback 
  1097.         if self.bodyDataCallback:
  1098.             body = None
  1099.         elif self.chunked:
  1100.             body = ''.join(self.chunks)
  1101.         else:
  1102.             body = self.body
  1103.         response = self.makeResponse(body)
  1104.         if self.stream.isOpen():
  1105.             if self.willClose:
  1106.                 self.closeConnection()
  1107.                 self.changeState('closed')
  1108.             elif self.pipelinedRequest is not None:
  1109.                 req = self.pipelinedRequest
  1110.                 self.pipelinedRequest = None
  1111.                 self.startNewRequest(*req)
  1112.             else:
  1113.                 self.changeState('ready')
  1114.                 self.idleSince = clock()
  1115.         trapCall(self, origCallback, response)
  1116.         self.requestsFinished += 1
  1117.         self.maybeSendReadyCallback()
  1118.  
  1119.     def makeResponse(self, body=None):
  1120.         response = self.headers.copy()
  1121.         response['body'] = body
  1122.         for key in ('version', 'status', 'reason', 'method', 'path', 'host',
  1123.                 'port', 'contentLength'):
  1124.             response[key] = getattr(self, key)
  1125.         return response
  1126.  
  1127.     def maybeSendReadyCallback(self):
  1128.         if (self.readyCallback and self.canSendRequest() and not
  1129.                 self.sentReadyCallback):
  1130.             self.sentReadyCallback = True
  1131.  
  1132.             # This needs to be in an idle so that the connection is added
  1133.             # to the "active" list before the open callback happens --NN
  1134.             eventloop.addIdle(lambda : self.readyCallback(self),
  1135.                               "Ready Callback %s" % str(self))
  1136.         
  1137.     def handleClose(self, type):
  1138.         oldState = self.state
  1139.         self.closeConnection()
  1140.         if oldState == 'response-body' and self.contentLength is None:
  1141.             self.body = self.buffer.read()
  1142.             self.finishRequest()
  1143.         elif self.stream.timedOut:
  1144.             self.errback(ConnectionTimeout(self.host))
  1145.         else:
  1146.             self.errback(ServerClosedConnection(self.host))
  1147.         self.checkPipelineNotStarted()
  1148.  
  1149.     def handleError(self, error):
  1150.         self.closeConnection()
  1151.         trapCall(self, self.errback, error)
  1152.  
  1153. class HTTPSConnection(HTTPConnection):
  1154.     streamFactory = AsyncSSLStream
  1155.     scheme = 'https'
  1156.  
  1157. class ProxyHTTPSConnection(HTTPConnection):
  1158.     streamFactory = ProxiedAsyncSSLStream
  1159.     scheme = 'https'
  1160.  
  1161. class HTTPConnectionPool(object):
  1162.     """Handle a pool of HTTP connections.
  1163.  
  1164.     We use the following stategy to handle new requests:
  1165.     * If there is an connection on the server that's ready to send, use that.
  1166.     * If we haven't hit our connection limits, create a new request
  1167.     * When a connection becomes closed, we look for our last 
  1168.  
  1169.     NOTE: "server" in this class means the combination of the scheme, hostname
  1170.     and port.
  1171.     """
  1172.  
  1173.     HTTP_CONN = HTTPConnection
  1174.     HTTPS_CONN = HTTPSConnection
  1175.     PROXY_HTTPS_CONN = ProxyHTTPSConnection
  1176.     MAX_CONNECTIONS_PER_SERVER = 2 
  1177.     CONNECTION_TIMEOUT = 300
  1178.     MAX_CONNECTIONS = 30
  1179.  
  1180.     def __init__(self):
  1181.         self.pendingRequests = []
  1182.         self.activeConnectionCount = 0
  1183.         self.freeConnectionCount = 0
  1184.         self.connections = {}
  1185.         eventloop.addTimeout(60, self.cleanupPool, 
  1186.             "Check HTTP Connection Timeouts")
  1187.  
  1188.     def _getServerConnections(self, scheme, host, port):
  1189.         key = '%s:%s:%s' % (scheme, host, port)
  1190.         try:
  1191.             return self.connections[key]
  1192.         except KeyError:
  1193.             self.connections[key] = {'free': set(), 'active': set()}
  1194.             return self.connections[key]
  1195.  
  1196.     def _popPendingRequest(self):
  1197.         """Try to choose a pending request to process.  If one is found,
  1198.         remove it from the pendingRequests list and return it.  If not, return
  1199.         None.
  1200.         """
  1201.  
  1202.         if self.activeConnectionCount >= self.MAX_CONNECTIONS:
  1203.             return None
  1204.         for i in xrange(len(self.pendingRequests)):
  1205.             req = self.pendingRequests[i]
  1206.             if req['proxy_host']:
  1207.                 conns = self._getServerConnections(req['scheme'],
  1208.                                                    req['proxy_host'], 
  1209.                                                    req['proxy_port'])
  1210.             else:
  1211.                 conns = self._getServerConnections(req['scheme'], req['host'], 
  1212.                                                    req['port'])
  1213.  
  1214.             if (len(conns['free']) > 0 or 
  1215.                     len(conns['active']) < self.MAX_CONNECTIONS_PER_SERVER):
  1216.                 # This doesn't mess up the xrange above since we return immediately.
  1217.                 del self.pendingRequests[i]
  1218.                 return req
  1219.         return None
  1220.  
  1221.     def _onConnectionClosed(self, conn):
  1222.         conns = self._getServerConnections(conn.scheme, conn.host, conn.port)
  1223.         if conn in conns['active']:
  1224.             conns['active'].remove(conn)
  1225.             self.activeConnectionCount -= 1
  1226.         elif conn in conns['free']:
  1227.             conns['free'].remove(conn)
  1228.             self.freeConnectionCount -= 1
  1229.         else:
  1230.             logging.warn("_onConnectionClosed called with connection not "
  1231.                     "in either queue")
  1232.         self.runPendingRequests()
  1233.  
  1234.     def _onConnectionReady(self, conn):
  1235.         conns = self._getServerConnections(conn.scheme, conn.host, conn.port)
  1236.         if conn in conns['active']:
  1237.             conns['active'].remove(conn)
  1238.             self.activeConnectionCount -= 1
  1239.         else:
  1240.             logging.warn("_onConnectionReady called with connection not "
  1241.                     "in the active queue")
  1242.         if conn not in conns['free']:
  1243.             conns['free'].add(conn)
  1244.             self.freeConnectionCount += 1
  1245.         else:
  1246.             logging.warn("_onConnectionReady called with connection already "
  1247.                     "in the free queue")
  1248.         self.runPendingRequests()
  1249.  
  1250.     def addRequest(self, callback, errback, requestStartCallback,
  1251.             headerCallback, bodyDataCallback, url, method, headers,
  1252.             postVariables = None, postFiles = None):
  1253.         """Add a request to be run.  The request will run immediately if we
  1254.         have a free connection, otherwise it will be queued.
  1255.  
  1256.         returns a request id that can be passed to cancelRequest
  1257.         """
  1258.         proxy_host = proxy_port = None
  1259.         scheme, host, port, path = parseURL(url)
  1260.         if scheme not in ['http', 'https'] or host == '' or path == '':
  1261.             errback (MalformedURL(url))
  1262.             return
  1263.         # using proxy
  1264.         # NOTE: The code for HTTPS over a proxy is in _makeNewConnection()
  1265.         if scheme == 'http' and config.get(prefs.HTTP_PROXY_ACTIVE):
  1266.             if config.get(prefs.HTTP_PROXY_HOST) and \
  1267.                    config.get(prefs.HTTP_PROXY_PORT):
  1268.                 proxy_host = config.get(prefs.HTTP_PROXY_HOST)
  1269.                 proxy_port = config.get(prefs.HTTP_PROXY_PORT)
  1270.                 path = url
  1271.                 scheme = config.get(prefs.HTTP_PROXY_SCHEME)
  1272.                 if config.get(prefs.HTTP_PROXY_AUTHORIZATION_ACTIVE):
  1273.                     username = config.get(prefs.HTTP_PROXY_AUTHORIZATION_USERNAME)
  1274.                     password = config.get(prefs.HTTP_PROXY_AUTHORIZATION_PASSWORD)
  1275.                     authString = username+':'+password
  1276.                     authString = b64encode(authString)
  1277.                     headers['ProxyAuthorization'] = "Basic " + authString
  1278.         req = {
  1279.             'callback' : callback,
  1280.             'errback': errback,
  1281.             'requestStartCallback': requestStartCallback,
  1282.             'headerCallback': headerCallback,
  1283.             'bodyDataCallback': bodyDataCallback,
  1284.             'scheme': scheme,
  1285.             'host': host,
  1286.             'port': port,
  1287.             'method': method,
  1288.             'path': path,
  1289.             'headers': headers,
  1290.             'postVariables': postVariables,
  1291.             'postFiles': postFiles,
  1292.             'proxy_host': proxy_host,
  1293.             'proxy_port': proxy_port,
  1294.         }
  1295.         self.pendingRequests.append(req)
  1296.         self.runPendingRequests()
  1297.  
  1298.     def runPendingRequests(self):
  1299.         """Find pending requests have a free connection, otherwise it will be
  1300.         queued.
  1301.         """
  1302.  
  1303.         while True:
  1304.             req = self._popPendingRequest()
  1305.             if req is None:
  1306.                 return
  1307.             if req['proxy_host']:
  1308.                 conns = self._getServerConnections(req['scheme'],
  1309.                                                    req['proxy_host'], 
  1310.                                                    req['proxy_port'])
  1311.             else:
  1312.                 conns = self._getServerConnections(req['scheme'], req['host'], 
  1313.                                                    req['port'])
  1314.             if len(conns['free']) > 0:
  1315.                 conn = conns['free'].pop()
  1316.                 self.freeConnectionCount -= 1
  1317.                 conn.sendRequest(req['callback'], req['errback'],
  1318.                         req['host'], req['port'],
  1319.                         req['requestStartCallback'], req['headerCallback'],
  1320.                         req['bodyDataCallback'], req['method'], req['path'],
  1321.                         req['headers'], req['postVariables'], req['postFiles'])
  1322.             else:
  1323.                 conn = self._makeNewConnection(req)
  1324.             conns['active'].add(conn)
  1325.             self.activeConnectionCount += 1
  1326.             connectionCount = (self.activeConnectionCount +
  1327.                                self.freeConnectionCount)
  1328.             if connectionCount > self.MAX_CONNECTIONS:
  1329.                 self._dropAFreeConnection()
  1330.  
  1331.     def _makeNewConnection(self, req):
  1332.         disableReadTimeout = req['postFiles'] is not None
  1333.         def openConnectionCallback(conn):
  1334.             conn.sendRequest(req['callback'], req['errback'],
  1335.                              req['host'], req['port'],
  1336.                     req['requestStartCallback'], req['headerCallback'],
  1337.                     req['bodyDataCallback'], req['method'], req['path'],
  1338.                     req['headers'], req['postVariables'], req['postFiles'])
  1339.         def openConnectionErrback(error):
  1340.             if req['proxy_host']:
  1341.                 conns = self._getServerConnections(req['scheme'],
  1342.                                                    req['proxy_host'], 
  1343.                                                    req['proxy_port'])
  1344.             else:
  1345.                 conns = self._getServerConnections(req['scheme'], req['host'], 
  1346.                                                    req['port'])
  1347.             if conn in conns['active']:
  1348.                 conns['active'].remove(conn)
  1349.                 self.activeConnectionCount -= 1
  1350.             req['errback'](error)
  1351.  
  1352.         # using proxy
  1353.         #
  1354.         # NOTE: The code for HTTP over a proxy is in addRequest()
  1355.         if req['scheme'] == 'https' and config.get(prefs.HTTP_PROXY_ACTIVE):
  1356.             connect_scheme = 'proxied_https'
  1357.         else:
  1358.             connect_scheme = req['scheme']
  1359.  
  1360.         if connect_scheme == 'http':
  1361.             conn = self.HTTP_CONN(self._onConnectionClosed,
  1362.                     self._onConnectionReady)
  1363.         elif connect_scheme == 'https':
  1364.             conn = self.HTTPS_CONN(self._onConnectionClosed,
  1365.                     self._onConnectionReady)
  1366.         elif connect_scheme == 'proxied_https':
  1367.             conn = self.PROXY_HTTPS_CONN(self._onConnectionClosed,
  1368.                     self._onConnectionReady)
  1369.         else:
  1370.             raise AssertionError ("Code shouldn't reach here. (connect scheme %s)" % connect_scheme)
  1371.  
  1372.         # This needs to be in an idle so that the connection is added
  1373.         # to the "active" list before the open callback happens --NN
  1374.         if req['proxy_host']:
  1375.             eventloop.addIdle(lambda : conn.openConnection(req['proxy_host'],
  1376.                                                            req['proxy_port'],
  1377.                          openConnectionCallback, openConnectionErrback,
  1378.                          disableReadTimeout),
  1379.                           "Open connection %s" % str(self))
  1380.         else:
  1381.             eventloop.addIdle(lambda : conn.openConnection(req['host'],
  1382.                                                            req['port'],
  1383.                          openConnectionCallback, openConnectionErrback,
  1384.                          disableReadTimeout),
  1385.                           "Open connection %s" % str(self))
  1386.         return conn
  1387.  
  1388.     def _dropAFreeConnection(self):
  1389.         # TODO: pick based on LRU
  1390.         firstTime = sys.maxint
  1391.         toDrop = None
  1392.  
  1393.         for conns in self.connections.values():
  1394.             for candidate in conns['free']:
  1395.                 if candidate.idleSince < firstTime:
  1396.                     toDrop = candidate
  1397.         if toDrop is not None:
  1398.             toDrop.closeConnection()
  1399.  
  1400.     def cleanupPool(self):
  1401.         for serverKey in self.connections.keys():
  1402.             conns = self.connections[serverKey]
  1403.             toRemove = []
  1404.             for conn in conns['free']:
  1405.                 if (conn.idleSince is not None and 
  1406.                         conn.idleSince + self.CONNECTION_TIMEOUT <= clock()):
  1407.                     toRemove.append(conn)
  1408.             for conn in toRemove:
  1409.                 conn.closeConnection()
  1410.             if len(conns['free']) == len(conns['active']) == 0:
  1411.                 del self.connections[serverKey]
  1412.         eventloop.addTimeout(60, self.cleanupPool, 
  1413.             "HTTP Connection Pool Cleanup")
  1414.  
  1415. class HTTPClient(object):
  1416.     """High-level HTTP client object.  
  1417.     
  1418.     HTTPClients handle a single HTTP request, but may use several
  1419.     HTTPConnections if the server returns back with a redirection status code,
  1420.     asks for authorization, etc.  Connections are pooled using an
  1421.     HTTPConnectionPool object.
  1422.     """
  1423.  
  1424.     connectionPool = HTTPConnectionPool() # class-wid connection pool
  1425.     MAX_REDIRECTS = 10
  1426.     MAX_AUTH_ATTEMPS = 5
  1427.  
  1428.     def __init__(self, url, callback, errback, headerCallback=None,
  1429.             bodyDataCallback=None, method="GET", start=0, etag=None,
  1430.             modified=None, cookies=None, postVariables = None, postFiles = None):
  1431.         if cookies == None:
  1432.             cookies = {}
  1433.         
  1434.         self.url = url
  1435.         self.callback = callback
  1436.         self.errback = errback
  1437.         self.headerCallback = headerCallback
  1438.         self.bodyDataCallback = bodyDataCallback
  1439.         self.method = method
  1440.         self.start = start
  1441.         self.etag = etag
  1442.         self.modified = modified
  1443.         self.cookies = cookies # A dictionary of cookie names to
  1444.                                # dictionaries containing the keys
  1445.                                # 'Value', 'Version', 'received',
  1446.                                # 'Path', 'Domain', 'Port', 'Max-Age',
  1447.                                # 'Discard', 'Secure', and optionally
  1448.                                # one or more of the following:
  1449.                                # 'Comment', 'CommentURL', 'origPath',
  1450.                                # 'origDomain', 'origPort'
  1451.         self.postVariables = postVariables
  1452.         self.postFiles = postFiles
  1453.         self.depth = 0
  1454.         self.authAttempts = 0
  1455.         self.updateURLOk = True
  1456.         self.originalURL = self.updatedURL = self.redirectedURL = url
  1457.         self.userAgent = "%s/%s (%s)" % \
  1458.                          (config.get(prefs.SHORT_APP_NAME),
  1459.                           config.get(prefs.APP_VERSION),
  1460.                           config.get(prefs.PROJECT_URL))
  1461.         self.connection = None
  1462.         self.cancelled = False
  1463.         self.initHeaders()
  1464.  
  1465.     def __str__(self):
  1466.         return "%s: %s" % (type(self).__name__, self.url)
  1467.  
  1468.     def cancel(self):
  1469.         self.cancelled = True
  1470.         if self.connection is not None:
  1471.             self.connection.closeConnection()
  1472.             self.connection = None
  1473.  
  1474.     def isValidCookie(self, cookie, scheme, host, port, path):
  1475.         return ((time.time() - cookie['received'] < cookie['Max-Age']) and
  1476.                 (cookie['Version'] == '1') and
  1477.                 self.hostMatches(host, cookie['Domain']) and
  1478.                 path.startswith(cookie['Path']) and
  1479.                 self.portMatches(str(port), cookie['Port']) and
  1480.                 (scheme == 'https' or not cookie['Secure']))
  1481.  
  1482.     def dropStaleCookies(self):
  1483.         """Remove cookies that have expired or are invalid for this URL"""
  1484.         scheme, host, port, path = parseURL(self.url)
  1485.         temp = {}
  1486.         for name in self.cookies:
  1487.             if self.isValidCookie(self.cookies[name], scheme, host, port, path):
  1488.                 temp[name] = self.cookies[name]
  1489.         self.cookies = temp
  1490.  
  1491.     def hostMatches(self, host, host2):
  1492.         host = host.lower()
  1493.         host2 = host2.lower()
  1494.         if host.find('.') == -1:
  1495.             host = host+'.local'
  1496.         if host2.find('.') == -1:
  1497.             host2 = host2+'.local'
  1498.         if host2.startswith('.'):
  1499.             return host.endswith(host2)
  1500.         else:
  1501.             return host == host2
  1502.  
  1503.     def portMatches(self, port, portlist):
  1504.         if portlist is None:
  1505.             return True
  1506.         portlist = portlist.replace(',',' ').split()
  1507.         return port in portlist
  1508.  
  1509.     def initHeaders(self):
  1510.         self.headers = {}
  1511.         if self.start > 0:
  1512.             self.headers["Range"] = "bytes="+str(self.start)+"-"
  1513.         if not self.etag is None:
  1514.             self.headers["If-None-Match"] = self.etag
  1515.         if not self.modified is None:
  1516.             self.headers["If-Modified-Since"] = self.modified
  1517.         self.headers['User-Agent'] = self.userAgent
  1518.         self.setCookieHeader()
  1519.  
  1520.     def setCookieHeader(self):
  1521.         self.dropStaleCookies()
  1522.         if len(self.cookies) > 0:
  1523.             header = "$Version=1"
  1524.             for name in self.cookies:
  1525.                 header = '%s;%s=%s' % (header, name, self.cookies[name]['Value'])
  1526.                 if self.cookies[name].has_key('origPath'):
  1527.                     header = '%s;$Path=%s' % \
  1528.                                        (header, self.cookies[name]['origPath'])
  1529.                 if self.cookies[name].has_key('origDomain'):
  1530.                     header = '%s;$Domain=%s' % \
  1531.                                        (header, self.cookies[name]['origDomain'])
  1532.                 if self.cookies[name].has_key('origPort'):
  1533.                     header = '%s;$Port=%s' % \
  1534.                                        (header, self.cookies[name]['origPort'])
  1535.             self.headers['Cookie'] = header
  1536.  
  1537.     def startRequest(self):
  1538.         self.cancelled = False
  1539.         self.connection = None
  1540.         self.willHandleResponse = False
  1541.         self.gotBadStatusCode = False
  1542.         if 'Authorization' not in self.headers:
  1543.             scheme, host, port, path = parseURL(self.redirectedURL)
  1544.             def callback(authHeader):
  1545.                 if self.cancelled:
  1546.                     return
  1547.                 if authHeader is not None:
  1548.                     self.headers["Authorization"] = authHeader
  1549.                 self.reallyStartRequest()
  1550.             httpauth.findHTTPAuth(callback, host.decode('ascii','replace'), path.decode('ascii','replace'))
  1551.         else:
  1552.             self.reallyStartRequest()
  1553.  
  1554.     def reallyStartRequest(self):
  1555.         if self.bodyDataCallback is not None:
  1556.             bodyDataCallback = self.onBodyData
  1557.         else:
  1558.             bodyDataCallback = None
  1559.         self.connectionPool.addRequest(self.callbackIntercept,
  1560.                 self.errbackIntercept, self.onRequestStart, self.onHeaders,
  1561.                 bodyDataCallback,
  1562.                 self.url, self.method, self.headers, self.postVariables,
  1563.                 self.postFiles)
  1564.  
  1565.     def statusCodeExpected(self, status):
  1566.         expectedStatusCodes = set([200])
  1567.         if self.start != 0:
  1568.             expectedStatusCodes.add(206)
  1569.         if self.etag is not None or self.modified is not None:
  1570.             expectedStatusCodes.add(304)
  1571.         return status in expectedStatusCodes
  1572.  
  1573.     def callbackIntercept(self, response):
  1574.         if self.cancelled:
  1575.             print "WARNING: Callback on a cancelled request for %s" % self.url
  1576.             traceback.print_stack()
  1577.             return
  1578.         if self.shouldRedirect(response):
  1579.             self.handleRedirect(response)
  1580.         elif self.shouldAuthorize(response):
  1581.             # FIXME: We reuse the id here, but if the request is
  1582.             # cancelled while the auth dialog is up, it won't actually
  1583.             # get cancelled.
  1584.             self.handleAuthorize(response)
  1585.         else:
  1586.             self.connection = None
  1587.             if not self.gotBadStatusCode:
  1588.                 if self.callback:
  1589.                     response = self.prepareResponse(response)
  1590.                     trapCall(self, self.callback, response)
  1591.             elif self.errback:
  1592.                 error = UnexpectedStatusCode(response['status'])
  1593.                 self.errbackIntercept(error)
  1594.  
  1595.     def errbackIntercept(self, error):
  1596.         if self.cancelled:
  1597.             return
  1598.         elif isinstance(error, PipelinedRequestNeverStarted):
  1599.             # Connection closed before our pipelined request started.  RFC
  1600.             # 2616 says we should retry
  1601.             self.startRequest() 
  1602.             # this should give us a new connection, since our last one closed
  1603.         elif (isinstance(error, ServerClosedConnection) and
  1604.                 self.connection is not None and
  1605.                 self.connection.requestsFinished > 0 and 
  1606.                 self.connection.bytesRead == 0):
  1607.             # Connection closed when trying to reuse an http connection.  We
  1608.             # should retry with a fresh connection
  1609.             self.startRequest()
  1610.         else:
  1611.             self.connection = None
  1612.             trapCall(self, self.errback, error)
  1613.  
  1614.     def onRequestStart(self, connection):
  1615.         if self.cancelled:
  1616.             connection.closeConnection()
  1617.         else:
  1618.             self.connection = connection
  1619.  
  1620.     def onHeaders(self, response):
  1621.         if self.shouldRedirect(response) or self.shouldAuthorize(response):
  1622.             self.willHandleResponse = True
  1623.         else:
  1624.             if not self.statusCodeExpected(response['status']):
  1625.                 self.gotBadStatusCode = True
  1626.             if self.headerCallback is not None:
  1627.                 response = self.prepareResponse(response)
  1628.                 if not trapCall(self, self.headerCallback, response):
  1629.                     self.cancel()
  1630.  
  1631.     def onBodyData(self, data):
  1632.         if (not self.willHandleResponse and not self.gotBadStatusCode and 
  1633.                 self.bodyDataCallback):
  1634.             if not trapCall(self, self.bodyDataCallback, data):
  1635.                 self.cancel()
  1636.  
  1637.     def prepareResponse(self, response):
  1638.         response['original-url'] = self.originalURL
  1639.         response['updated-url'] = self.updatedURL
  1640.         response['redirected-url'] = self.redirectedURL
  1641.         response['filename'] = self.getFilenameFromResponse(response)
  1642.         response['charset'] = self.getCharsetFromResponse(response)
  1643.         try:
  1644.             response['cookies'] = self.getCookiesFromResponse(response)
  1645.         except:
  1646.             print "ERROR in getCookiesFromResponse()"
  1647.             traceback.print_exc()
  1648.         return response
  1649.  
  1650.     def getCookiesFromResponse(self, response):
  1651.         """Generates a cookie dictionary from headers in response
  1652.         """
  1653.         def getAttrPair(attr):
  1654.             result = attr.strip().split('=', 1)
  1655.             if len(result) == 2:
  1656.                 (name, value) = result
  1657.             else:
  1658.                 name = result[0]
  1659.                 value = ''
  1660.             return (name, value)
  1661.         cookies = {}
  1662.         cookieStrings = []
  1663.         if response.has_key('set-cookie') or response.has_key('set-cookie2'):
  1664.             scheme, host, port, path = parseURL(self.redirectedURL)
  1665.  
  1666.             # Split header into cookie strings, respecting commas in
  1667.             # the middle of stuff
  1668.             if response.has_key('set-cookie'):
  1669.                 cookieStrings.extend(response['set-cookie'].split(','))
  1670.             if response.has_key('set-cookie2'):
  1671.                 cookieStrings.extend(response['set-cookie2'].split(','))
  1672.             temp = []
  1673.             for string in cookieStrings:
  1674.                 if (len(temp) > 0 and (
  1675.                     (temp[-1].count('"')%2 == 1) or
  1676.                     (string.find('=') == -1) or
  1677.                     (string.find('=') > string.find(';')))):
  1678.                     temp[-1] = '%s,%s' % (temp[-1], string)
  1679.                 else:
  1680.                     temp.append(string)
  1681.             cookieStrings = temp
  1682.             
  1683.             for string in cookieStrings:
  1684.                 # Strip whitespace from the cookie string and split
  1685.                 # into name-value pairs.
  1686.                 string = string.strip()
  1687.                 pairs = string.split(';')
  1688.                 temp = []
  1689.                 for pair in pairs:
  1690.                     if (len(temp) > 0 and
  1691.                         (temp[-1].count('"')%2 == 1)):
  1692.                         temp[-1] = '%s;%s' % (temp[-1], pair)
  1693.                     else:
  1694.                         temp.append(pair)
  1695.                 pairs = temp
  1696.  
  1697.                 (name, value) = getAttrPair(pairs.pop(0))
  1698.                 cookie = {'Value' : value,
  1699.                           'Version' : '1',
  1700.                           'received' : time.time(),
  1701.                           # Path is everything up until the last /
  1702.                           'Path' : '/'.join(path.split('/')[:-1])+'/',
  1703.                           'Domain' : host,
  1704.                           'Port' : str(port),
  1705.                           'Secure' : False}
  1706.                 for attr in pairs:
  1707.                     attr = attr.strip()
  1708.                     if attr.lower() == 'discard':
  1709.                         cookie['Discard'] = True
  1710.                     elif attr.lower() == 'secure':
  1711.                         cookie['Secure'] = True
  1712.                     elif attr.lower().startswith('version='):
  1713.                         cookie['Version'] = getAttrPair(attr)[1]
  1714.                     elif attr.lower().startswith('comment='):
  1715.                         cookie['Comment'] = getAttrPair(attr)[1]
  1716.                     elif attr.lower().startswith('commenturl='):
  1717.                         cookie['CommentURL'] = getAttrPair(attr)[1]
  1718.                     elif attr.lower().startswith('max-age='):
  1719.                         cookie['Max-Age'] = getAttrPair(attr)[1]
  1720.                     elif attr.lower().startswith('expires='):
  1721.                         now = time.time()
  1722.                         # FIXME: "expires" isn't very well defined and
  1723.                         # this code will probably puke in certain cases
  1724.                         cookieval = getAttrPair(attr)[1].strip()
  1725.                         expires = get_cookie_expiration_date(cookieval)
  1726.                         
  1727.                         expires -= time.timezone
  1728.                         if expires < now:
  1729.                             cookie['Max-Age'] = 0
  1730.                         else:
  1731.                             cookie['Max-Age'] = int(expires - now)
  1732.                     elif attr.lower().startswith('domain='):
  1733.                         cookie['origDomain'] = getAttrPair(attr)[1]
  1734.                         cookie['Domain'] = cookie['origDomain']
  1735.                     elif attr.lower().startswith('port='):
  1736.                         cookie['origPort'] = getAttrPair(attr)[1]
  1737.                         cookie['Port'] = cookie['origPort']
  1738.                     elif attr.lower().startswith('path='):
  1739.                         cookie['origPath'] = getAttrPair(attr)[1]
  1740.                         cookie['Path'] = cookie['origPath']
  1741.                 if not cookie.has_key('Discard'):
  1742.                     cookie['Discard'] = not cookie.has_key('Max-Age')
  1743.                 if not cookie.has_key('Max-Age'):
  1744.                     cookie['Max-Age'] = str(2**30)
  1745.                 if self.isValidCookie(cookie, scheme, host, port, path):
  1746.                     cookies[name] = cookie
  1747.         return cookies
  1748.  
  1749.     def findValueFromHeader(self, header, targetName):
  1750.         """Finds a value from a response header that uses key=value pairs with
  1751.         the ';' char as a separator.  This is how content-disposition and
  1752.         content-type work.
  1753.         """
  1754.         for part in header.split(';'):
  1755.             try:
  1756.                 name, value = part.split("=", 1)
  1757.             except ValueError:
  1758.                 pass
  1759.             else:
  1760.                 if name.strip().lower() == targetName.lower():
  1761.                     return value.strip().strip('"')
  1762.         return None
  1763.  
  1764.     def getFilenameFromResponse(self, response):
  1765.         try:
  1766.             disposition = response['content-disposition']
  1767.         except KeyError:
  1768.             pass
  1769.         else:
  1770.             filename = self.findValueFromHeader(disposition, 'filename')
  1771.             if filename is not None:
  1772.                 return cleanFilename(filename)
  1773.         return filenameFromURL(util.unicodify(response['redirected-url']), clean=True)
  1774.  
  1775.     def getCharsetFromResponse(self, response):
  1776.         try:
  1777.             contentType = response['content-type']
  1778.         except KeyError:
  1779.             pass
  1780.         else:
  1781.             charset = self.findValueFromHeader(contentType, 'charset')
  1782.             if charset is not None:
  1783.                 return charset
  1784.         return 'iso-8859-1'
  1785.  
  1786.     def shouldRedirect(self, response):
  1787.         return (response['status'] in (301, 302, 303, 307) and 
  1788.                 self.depth < self.MAX_REDIRECTS and 
  1789.                 'location' in response)
  1790.  
  1791.     def handleRedirect(self, response):
  1792.         self.depth += 1
  1793.         self.url = urljoin(self.url, response['location'])
  1794.         self.redirectedURL = self.url
  1795.         if response['status'] == 301 and self.updateURLOk:
  1796.             self.updatedURL = self.url
  1797.         else:
  1798.             self.updateURLOk = False
  1799.         if response['status'] == 303:
  1800.             # "See Other" we must do a get request for the result
  1801.             self.method = "GET"
  1802.             self.postVariables = None
  1803.         if 'Authorization' in self.headers:
  1804.             del self.headers["Authorization"]
  1805.         self.startRequest()
  1806.  
  1807.     def shouldAuthorize(self, response):
  1808.         return (response['status'] == 401 and 
  1809.                 self.authAttempts < self.MAX_AUTH_ATTEMPS and
  1810.                 'www-authenticate' in response)
  1811.  
  1812.     def handleAuthorize(self, response):
  1813.         match = re.search("(\w+)\s+realm\s*=\s*\"(.*?)\"$",
  1814.             response['www-authenticate'])
  1815.         if match is None:
  1816.             trapCall(self, self.errback, AuthorizationFailed())
  1817.             return
  1818.         authScheme = unicode(match.expand("\\1"))
  1819.         realm = unicode(match.expand("\\2"))
  1820.         if authScheme.lower() != 'basic':
  1821.             trapCall(self, self.errback, AuthorizationFailed())
  1822.             return
  1823.         def callback(authHeader):
  1824.             if authHeader is not None:
  1825.                 self.headers["Authorization"] = authHeader
  1826.                 self.authAttempts += 1
  1827.                 self.startRequest()
  1828.             else:
  1829.                 trapCall(self, self.errback, AuthorizationFailed())
  1830.         httpauth.askForHTTPAuth(callback, self.url, realm, authScheme)
  1831.  
  1832. # Grabs a URL in the background using the eventloop
  1833. # defaultMimeType is used for file:// URLs
  1834. def grabURL(url, callback, errback, headerCallback=None,
  1835.         bodyDataCallback=None, method="GET", start=0, etag=None,
  1836.         modified=None, cookies=None, postVariables = None, postFiles = None,
  1837.         defaultMimeType='application/octet-stream', clientClass = HTTPClient):
  1838.     if cookies == None:
  1839.         cookies = {}
  1840.     if url.startswith("file://"):
  1841.         path = getFileURLPath(url)
  1842.         try:
  1843.             f = file(path)
  1844.         except EnvironmentError:
  1845.             errback(FileURLNotFoundError(path))
  1846.         else:
  1847.             try:
  1848.                 data = f.read()
  1849.             except:
  1850.                 errback(FileURLReadError(path))
  1851.             else:
  1852.                 callback({"body": data,
  1853.                               "updated-url":url,
  1854.                               "redirected-url":url,
  1855.                               "content-type": defaultMimeType,
  1856.                            })
  1857.     else:
  1858.         client = clientClass(url, callback, errback, headerCallback,
  1859.             bodyDataCallback, method, start, etag, modified, cookies, postVariables, postFiles)
  1860.         client.startRequest()
  1861.         return client
  1862.  
  1863. class HTTPHeaderGrabber(HTTPClient):
  1864.     """Modified HTTPClient to get the headers for a URL.  It tries to do a
  1865.     HEAD request, then falls back on doing a GET request, and closing the
  1866.     connection right after the headers.
  1867.     """
  1868.  
  1869.     def __init__(self, url, callback, errback):
  1870.         """HTTPHeaderGrabber support a lot less features than a real
  1871.         HTTPClient, mostly this is because they don't make sense in this
  1872.         context."""
  1873.         HTTPClient.__init__(self, url, callback, errback)
  1874.     
  1875.     def startRequest(self):
  1876.         self.method = "HEAD"
  1877.         HTTPClient.startRequest(self)
  1878.  
  1879.     def errbackIntercept(self, error):
  1880.         if self.method == 'HEAD' and not self.cancelled:
  1881.             self.method = "GET"
  1882.             HTTPClient.startRequest(self)
  1883.         else:
  1884.             HTTPClient.errbackIntercept(self, error)
  1885.  
  1886.     def callbackIntercept(self, response):
  1887.         # we send the callback for GET requests during the headers
  1888.         if self.method != 'GET' or self.willHandleResponse:
  1889.             HTTPClient.callbackIntercept(self, response)
  1890.  
  1891.     def onHeaders(self, headers):
  1892.         HTTPClient.onHeaders(self, headers)
  1893.         if (self.method == 'GET' and not self.willHandleResponse):
  1894.             headers['body'] = '' 
  1895.             # make it match the behaviour of a HEAD request
  1896.             self.callback(self.prepareResponse(headers))
  1897.             self.cancel()
  1898.  
  1899. def grabHeaders (url, callback, errback,  clientClass = HTTPHeaderGrabber):
  1900.     client = clientClass(url, callback, errback)
  1901.     client.startRequest()
  1902.     return client
  1903.