home *** CD-ROM | disk | FTP | other *** search
- # Copyright (C) 2001 Python Software Foundation
- # Author: barry@zope.com (Barry Warsaw)
-
- """Basic message object for the email package object model.
- """
-
- from __future__ import generators
-
- import re
- import base64
- import quopri
- from cStringIO import StringIO
- from types import ListType
-
- # Intrapackage imports
- import Errors
- import Utils
-
- SEMISPACE = '; '
- paramre = re.compile(r'\s*;\s*')
-
-
-
- class Message:
- """Basic message object for use inside the object tree.
-
- A message object is defined as something that has a bunch of RFC 2822
- headers and a payload. If the body of the message is a multipart, then
- the payload is a list of Messages, otherwise it is a string.
-
- These objects implement part of the `mapping' interface, which assumes
- there is exactly one occurrance of the header per message. Some headers
- do in fact appear multiple times (e.g. Received:) and for those headers,
- you must use the explicit API to set or get all the headers. Not all of
- the mapping methods are implemented.
-
- """
- def __init__(self):
- self._headers = []
- self._unixfrom = None
- self._payload = None
- # Defaults for multipart messages
- self.preamble = self.epilogue = None
-
- def __str__(self):
- """Return the entire formatted message as a string.
- This includes the headers, body, and `unixfrom' line.
- """
- return self.as_string(unixfrom=1)
-
- def as_string(self, unixfrom=0):
- """Return the entire formatted message as a string.
- Optional `unixfrom' when true, means include the Unix From_ envelope
- header.
- """
- from Generator import Generator
- fp = StringIO()
- g = Generator(fp)
- g(self, unixfrom=unixfrom)
- return fp.getvalue()
-
- def is_multipart(self):
- """Return true if the message consists of multiple parts."""
- if type(self._payload) is ListType:
- return 1
- return 0
-
- #
- # Unix From_ line
- #
- def set_unixfrom(self, unixfrom):
- self._unixfrom = unixfrom
-
- def get_unixfrom(self):
- return self._unixfrom
-
- #
- # Payload manipulation.
- #
- def add_payload(self, payload):
- """Add the given payload to the current payload.
-
- If the current payload is empty, then the current payload will be made
- a scalar, set to the given value.
- """
- if self._payload is None:
- self._payload = payload
- elif type(self._payload) is ListType:
- self._payload.append(payload)
- elif self.get_main_type() not in (None, 'multipart'):
- raise Errors.MultipartConversionError(
- 'Message main Content-Type: must be "multipart" or missing')
- else:
- self._payload = [self._payload, payload]
-
- # A useful synonym
- attach = add_payload
-
- def get_payload(self, i=None, decode=0):
- """Return the current payload exactly as is.
-
- Optional i returns that index into the payload.
-
- Optional decode is a flag indicating whether the payload should be
- decoded or not, according to the Content-Transfer-Encoding: header.
- When true and the message is not a multipart, the payload will be
- decoded if this header's value is `quoted-printable' or `base64'. If
- some other encoding is used, or the header is missing, the payload is
- returned as-is (undecoded). If the message is a multipart and the
- decode flag is true, then None is returned.
- """
- if i is None:
- payload = self._payload
- elif type(self._payload) is not ListType:
- raise TypeError, i
- else:
- payload = self._payload[i]
- if decode:
- if self.is_multipart():
- return None
- cte = self.get('content-transfer-encoding', '')
- if cte.lower() == 'quoted-printable':
- return Utils._qdecode(payload)
- elif cte.lower() == 'base64':
- return Utils._bdecode(payload)
- # Everything else, including encodings with 8bit or 7bit are returned
- # unchanged.
- return payload
-
-
- def set_payload(self, payload):
- """Set the payload to the given value."""
- self._payload = payload
-
- #
- # MAPPING INTERFACE (partial)
- #
- def __len__(self):
- """Return the total number of headers, including duplicates."""
- return len(self._headers)
-
- def __getitem__(self, name):
- """Get a header value.
-
- Return None if the header is missing instead of raising an exception.
-
- Note that if the header appeared multiple times, exactly which
- occurrance gets returned is undefined. Use getall() to get all
- the values matching a header field name.
- """
- return self.get(name)
-
- def __setitem__(self, name, val):
- """Set the value of a header.
-
- Note: this does not overwrite an existing header with the same field
- name. Use __delitem__() first to delete any existing headers.
- """
- self._headers.append((name, val))
-
- def __delitem__(self, name):
- """Delete all occurrences of a header, if present.
-
- Does not raise an exception if the header is missing.
- """
- name = name.lower()
- newheaders = []
- for k, v in self._headers:
- if k.lower() <> name:
- newheaders.append((k, v))
- self._headers = newheaders
-
- def __contains__(self, key):
- return key.lower() in [k.lower() for k, v in self._headers]
-
- def has_key(self, name):
- """Return true if the message contains the header."""
- missing = []
- return self.get(name, missing) is not missing
-
- def keys(self):
- """Return a list of all the message's header field names.
-
- These will be sorted in the order they appeared in the original
- message, and may contain duplicates. Any fields deleted and
- re-inserted are always appended to the header list.
- """
- return [k for k, v in self._headers]
-
- def values(self):
- """Return a list of all the message's header values.
-
- These will be sorted in the order they appeared in the original
- message, and may contain duplicates. Any fields deleted and
- re-inserted are always appended to the header list.
- """
- return [v for k, v in self._headers]
-
- def items(self):
- """Get all the message's header fields and values.
-
- These will be sorted in the order they appeared in the original
- message, and may contain duplicates. Any fields deleted and
- re-inserted are always appended to the header list.
- """
- return self._headers[:]
-
- def get(self, name, failobj=None):
- """Get a header value.
-
- Like __getitem__() but return failobj instead of None when the field
- is missing.
- """
- name = name.lower()
- for k, v in self._headers:
- if k.lower() == name:
- return v
- return failobj
-
- #
- # Additional useful stuff
- #
-
- def get_all(self, name, failobj=None):
- """Return a list of all the values for the named field.
-
- These will be sorted in the order they appeared in the original
- message, and may contain duplicates. Any fields deleted and
- re-inserted are always appended to the header list.
-
- If no such fields exist, failobj is returned (defaults to None).
- """
- values = []
- name = name.lower()
- for k, v in self._headers:
- if k.lower() == name:
- values.append(v)
- if not values:
- return failobj
- return values
-
- def add_header(self, _name, _value, **_params):
- """Extended header setting.
-
- name is the header field to add. keyword arguments can be used to set
- additional parameters for the header field, with underscores converted
- to dashes. Normally the parameter will be added as key="value" unless
- value is None, in which case only the key will be added.
-
- Example:
-
- msg.add_header('content-disposition', 'attachment', filename='bud.gif')
-
- """
- parts = []
- for k, v in _params.items():
- if v is None:
- parts.append(k.replace('_', '-'))
- else:
- parts.append('%s="%s"' % (k.replace('_', '-'), v))
- if _value is not None:
- parts.insert(0, _value)
- self._headers.append((_name, SEMISPACE.join(parts)))
-
- def get_type(self, failobj=None):
- """Returns the message's content type.
-
- The returned string is coerced to lowercase and returned as a single
- string of the form `maintype/subtype'. If there was no Content-Type:
- header in the message, failobj is returned (defaults to None).
- """
- missing = []
- value = self.get('content-type', missing)
- if value is missing:
- return failobj
- return paramre.split(value)[0].lower()
-
- def get_main_type(self, failobj=None):
- """Return the message's main content type if present."""
- missing = []
- ctype = self.get_type(missing)
- if ctype is missing:
- return failobj
- parts = ctype.split('/')
- if len(parts) > 0:
- return ctype.split('/')[0]
- return failobj
-
- def get_subtype(self, failobj=None):
- """Return the message's content subtype if present."""
- missing = []
- ctype = self.get_type(missing)
- if ctype is missing:
- return failobj
- parts = ctype.split('/')
- if len(parts) > 1:
- return ctype.split('/')[1]
- return failobj
-
- def _get_params_preserve(self, failobj, header):
- # Like get_params() but preserves the quoting of values. BAW:
- # should this be part of the public interface?
- missing = []
- value = self.get(header, missing)
- if value is missing:
- return failobj
- params = []
- for p in paramre.split(value):
- try:
- name, val = p.split('=', 1)
- except ValueError:
- # Must have been a bare attribute
- name = p
- val = ''
- params.append((name, val))
- return params
-
- def get_params(self, failobj=None, header='content-type'):
- """Return the message's Content-Type: parameters, as a list.
-
- The elements of the returned list are 2-tuples of key/value pairs, as
- split on the `=' sign. The left hand side of the `=' is the key,
- while the right hand side is the value. If there is no `=' sign in
- the parameter the value is the empty string. The value is always
- unquoted.
-
- Optional failobj is the object to return if there is no Content-Type:
- header. Optional header is the header to search instead of
- Content-Type:
- """
- missing = []
- params = self._get_params_preserve(missing, header)
- if params is missing:
- return failobj
- return [(k, Utils.unquote(v)) for k, v in params]
-
- def get_param(self, param, failobj=None, header='content-type'):
- """Return the parameter value if found in the Content-Type: header.
-
- Optional failobj is the object to return if there is no Content-Type:
- header. Optional header is the header to search instead of
- Content-Type:
-
- Parameter keys are always compared case insensitively. Values are
- always unquoted.
- """
- if not self.has_key(header):
- return failobj
- for k, v in self._get_params_preserve(failobj, header):
- if k.lower() == param.lower():
- return Utils.unquote(v)
- return failobj
-
- def get_filename(self, failobj=None):
- """Return the filename associated with the payload if present.
-
- The filename is extracted from the Content-Disposition: header's
- `filename' parameter, and it is unquoted.
- """
- missing = []
- filename = self.get_param('filename', missing, 'content-disposition')
- if filename is missing:
- return failobj
- return Utils.unquote(filename.strip())
-
- def get_boundary(self, failobj=None):
- """Return the boundary associated with the payload if present.
-
- The boundary is extracted from the Content-Type: header's `boundary'
- parameter, and it is unquoted.
- """
- missing = []
- boundary = self.get_param('boundary', missing)
- if boundary is missing:
- return failobj
- return Utils.unquote(boundary.strip())
-
- def set_boundary(self, boundary):
- """Set the boundary parameter in Content-Type: to 'boundary'.
-
- This is subtly different than deleting the Content-Type: header and
- adding a new one with a new boundary parameter via add_header(). The
- main difference is that using the set_boundary() method preserves the
- order of the Content-Type: header in the original message.
-
- HeaderParseError is raised if the message has no Content-Type: header.
- """
- missing = []
- params = self._get_params_preserve(missing, 'content-type')
- if params is missing:
- # There was no Content-Type: header, and we don't know what type
- # to set it to, so raise an exception.
- raise Errors.HeaderParseError, 'No Content-Type: header found'
- newparams = []
- foundp = 0
- for pk, pv in params:
- if pk.lower() == 'boundary':
- newparams.append(('boundary', '"%s"' % boundary))
- foundp = 1
- else:
- newparams.append((pk, pv))
- if not foundp:
- # The original Content-Type: header had no boundary attribute.
- # Tack one one the end. BAW: should we raise an exception
- # instead???
- newparams.append(('boundary', '"%s"' % boundary))
- # Replace the existing Content-Type: header with the new value
- newheaders = []
- for h, v in self._headers:
- if h.lower() == 'content-type':
- parts = []
- for k, v in newparams:
- if v == '':
- parts.append(k)
- else:
- parts.append('%s=%s' % (k, v))
- newheaders.append((h, SEMISPACE.join(parts)))
-
- else:
- newheaders.append((h, v))
- self._headers = newheaders
-
- def walk(self):
- """Walk over the message tree, yielding each subpart.
-
- The walk is performed in depth-first order. This method is a
- generator.
- """
- yield self
- if self.is_multipart():
- for subpart in self.get_payload():
- for subsubpart in subpart.walk():
- yield subsubpart
-
- def get_charsets(self, failobj=None):
- """Return a list containing the charset(s) used in this message.
-
- The returned list of items describes the Content-Type: headers'
- charset parameter for this message and all the subparts in its
- payload.
-
- Each item will either be a string (the value of the charset parameter
- in the Content-Type: header of that part) or the value of the
- 'failobj' parameter (defaults to None), if the part does not have a
- main MIME type of "text", or the charset is not defined.
-
- The list will contain one string for each part of the message, plus
- one for the container message (i.e. self), so that a non-multipart
- message will still return a list of length 1.
- """
- return [part.get_param('charset', failobj) for part in self.walk()]
-