1 # Copyright (C) 2001-2006 Python Software Foundation
2 # Author: Barry Warsaw
3 # Contact: email-sig@python.org
4 
5 """Miscellaneous utilities."""
6 
7 __all__ = [
8     'collapse_rfc2231_value',
9     'decode_params',
10     'decode_rfc2231',
11     'encode_rfc2231',
12     'formataddr',
13     'formatdate',
14     'getaddresses',
15     'make_msgid',
16     'parseaddr',
17     'parsedate',
18     'parsedate_tz',
19     'unquote',
20     ]
21 
22 import os
23 import re
24 import time
25 import base64
26 import random
27 import socket
28 import urllib
29 import warnings
30 from cStringIO import StringIO
31 
32 from email._parseaddr import quote
33 from email._parseaddr import AddressList as _AddressList
34 from email._parseaddr import mktime_tz
35 
36 # We need wormarounds for bugs in these methods in older Pythons (see below)
37 from email._parseaddr import parsedate as _parsedate
38 from email._parseaddr import parsedate_tz as _parsedate_tz
39 
40 from quopri import decodestring as _qdecode
41 
42 # Intrapackage imports
43 from email.encoders import _bencode, _qencode
44 
45 COMMASPACE = ', '
46 EMPTYSTRING = ''
47 UEMPTYSTRING = u''
48 CRLF = '\r\n'
49 TICK = "'"
50 
51 specialsre = re.compile(r'[][\\()<>@,:;".]')
52 escapesre = re.compile(r'[][\\()"]')
53 
54 
55 
56 # Helpers
57 
58 def _identity(s):
59     return s
60 
61 
62 def _bdecode(s):
63     # We can't quite use base64.encodestring() since it tacks on a "courtesy
64     # newline".  Blech!
65     if not s:
66         return s
67     value = base64.decodestring(s)
68     if not s.endswith('\n') and value.endswith('\n'):
69         return value[:-1]
70     return value
71 
72 
73 
74 def fix_eols(s):
75     """Replace all line-ending characters with \r\n."""
76     # Fix newlines with no preceding carriage return
77     s = re.sub(r'(?<!\r)\n', CRLF, s)
78     # Fix carriage returns with no following newline
79     s = re.sub(r'\r(?!\n)', CRLF, s)
80     return s
81 
82 
83 
84 def formataddr(pair):
85     """The inverse of parseaddr(), this takes a 2-tuple of the form
86     (realname, email_address) and returns the string value suitable
87     for an RFC 2822 From, To or Cc header.
88 
89     If the first element of pair is false, then the second element is
90     returned unmodified.
91     """
92     name, address = pair
93     if name:
94         quotes = ''
95         if specialsre.search(name):
96             quotes = '"'
97         name = escapesre.sub(r'\\\g<0>', name)
98         return '%s%s%s <%s>' % (quotes, name, quotes, address)
99     return address
100 
101 
102 
103 def getaddresses(fieldvalues):
104     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
105     all = COMMASPACE.join(fieldvalues)
106     a = _AddressList(all)
107     return a.addresslist
108 
109 
110 
111 ecre = re.compile(r'''
112   =\?                   # literal =?
113   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
114   \?                    # literal ?
115   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
116   \?                    # literal ?
117   (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
118   \?=                   # literal ?=
119   ''', re.VERBOSE | re.IGNORECASE)
120 
121 
122 
123 def formatdate(timeval=None, localtime=False, usegmt=False):
124     """Returns a date string as specified by RFC 2822, e.g.:
125 
126     Fri, 09 Nov 2001 01:08:47 -0000
127 
128     Optional timeval if given is a floating point time value as accepted by
129     gmtime() and localtime(), otherwise the current time is used.
130 
131     Optional localtime is a flag that when True, interprets timeval, and
132     returns a date relative to the local timezone instead of UTC, properly
133     taking daylight savings time into account.
134 
135     Optional argument usegmt means that the timezone is written out as
136     an ascii string, not numeric one (so "GMT" instead of "+0000"). This
137     is needed for HTTP, and is only used when localtime==False.
138     """
139     # Note: we cannot use strftime() because that honors the locale and RFC
140     # 2822 requires that day and month names be the English abbreviations.
141     if timeval is None:
142         timeval = time.time()
143     if localtime:
144         now = time.localtime(timeval)
145         # Calculate timezone offset, based on whether the local zone has
146         # daylight savings time, and whether DST is in effect.
147         if time.daylight and now[-1]:
148             offset = time.altzone
149         else:
150             offset = time.timezone
151         hours, minutes = divmod(abs(offset), 3600)
152         # Remember offset is in seconds west of UTC, but the timezone is in
153         # minutes east of UTC, so the signs differ.
154         if offset > 0:
155             sign = '-'
156         else:
157             sign = '+'
158         zone = '%s%02d%02d' % (sign, hours, minutes // 60)
159     else:
160         now = time.gmtime(timeval)
161         # Timezone offset is always -0000
162         if usegmt:
163             zone = 'GMT'
164         else:
165             zone = '-0000'
166     return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
167         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
168         now[2],
169         ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
170          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
171         now[0], now[3], now[4], now[5],
172         zone)
173 
174 
175 
176 def make_msgid(idstring=None):
177     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
178 
179     <20020201195627.33539.96671@nightshade.la.mastaler.com>
180 
181     Optional idstring if given is a string used to strengthen the
182     uniqueness of the message id.
183     """
184     timeval = time.time()
185     utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
186     pid = os.getpid()
187     randint = random.randrange(100000)
188     if idstring is None:
189         idstring = ''
190     else:
191         idstring = '.' + idstring
192     idhost = socket.getfqdn()
193     msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
194     return msgid
195 
196 
197 
198 # These functions are in the standalone mimelib version only because they've
199 # subsequently been fixed in the latest Python versions.  We use this to worm
200 # around broken older Pythons.
201 def parsedate(data):
202     if not data:
203         return None
204     return _parsedate(data)
205 
206 
207 def parsedate_tz(data):
208     if not data:
209         return None
210     return _parsedate_tz(data)
211 
212 
213 def parseaddr(addr):
214     addrs = _AddressList(addr).addresslist
215     if not addrs:
216         return '', ''
217     return addrs[0]
218 
219 
220 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
221 def unquote(str):
222     """Remove quotes from a string."""
223     if len(str) > 1:
224         if str.startswith('"') and str.endswith('"'):
225             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
226         if str.startswith('<') and str.endswith('>'):
227             return str[1:-1]
228     return str
229 
230 
231 
232 # RFC2231-related functions - parameter encoding and decoding
233 def decode_rfc2231(s):
234     """Decode string according to RFC 2231"""
235     parts = s.split(TICK, 2)
236     if len(parts) <= 2:
237         return None, None, s
238     return parts
239 
240 
241 def encode_rfc2231(s, charset=None, language=None):
242     """Encode string according to RFC 2231.
243 
244     If neither charset nor language is given, then s is returned as-is.  If
245     charset is given but not language, the string is encoded using the empty
246     string for language.
247     """
248     import urllib
249     s = urllib.quote(s, safe='')
250     if charset is None and language is None:
251         return s
252     if language is None:
253         language = ''
254     return "%s'%s'%s" % (charset, language, s)
255 
256 
257 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
258 
259 def decode_params(params):
260     """Decode parameters list according to RFC 2231.
261 
262     params is a sequence of 2-tuples containing (param name, string value).
263     """
264     # Copy params so we don't mess with the original
265     params = params[:]
266     new_params = []
267     # Map parameter's name to a list of continuations.  The values are a
268     # 3-tuple of the continuation number, the string value, and a flag
269     # specifying whether a particular segment is %-encoded.
270     rfc2231_params = {}
271     name, value = params.pop(0)
272     new_params.append((name, value))
273     while params:
274         name, value = params.pop(0)
275         if name.endswith('*'):
276             encoded = True
277         else:
278             encoded = False
279         value = unquote(value)
280         mo = rfc2231_continuation.match(name)
281         if mo:
282             name, num = mo.group('name', 'num')
283             if num is not None:
284                 num = int(num)
285             rfc2231_params.setdefault(name, []).append((num, value, encoded))
286         else:
287             new_params.append((name, '"%s"' % quote(value)))
288     if rfc2231_params:
289         for name, continuations in rfc2231_params.items():
290             value = []
291             extended = False
292             # Sort by number
293             continuations.sort()
294             # And now append all values in numerical order, converting
295             # %-encodings for the encoded segments.  If any of the
296             # continuation names ends in a *, then the entire string, after
297             # decoding segments and concatenating, must have the charset and
298             # language specifiers at the beginning of the string.
299             for num, s, encoded in continuations:
300                 if encoded:
301                     s = urllib.unquote(s)
302                     extended = True
303                 value.append(s)
304             value = quote(EMPTYSTRING.join(value))
305             if extended:
306                 charset, language, value = decode_rfc2231(value)
307                 new_params.append((name, (charset, language, '"%s"' % value)))
308             else:
309                 new_params.append((name, '"%s"' % value))
310     return new_params
311 
312 def collapse_rfc2231_value(value, errors='replace',
313                            fallback_charset='us-ascii'):
314     if isinstance(value, tuple):
315         rawval = unquote(value[2])
316         charset = value[0] or 'us-ascii'
317         try:
318             return unicode(rawval, charset, errors)
319         except LookupError:
320             # XXX charset is unknown to Python.
321             return unicode(rawval, fallback_charset, errors)
322     else:
323         return unquote(value)