Index: src/org/python/core/codecs.java
===================================================================
--- src/org/python/core/codecs.java	(revision 2833)
+++ src/org/python/core/codecs.java	(working copy)
@@ -8,16 +8,29 @@
 
 package org.python.core;
 
+
+
 /**
  * Contains the implementation of the builtin codecs.
  * @since Jython 2.0
  */
 
 public class codecs {
+
+
+    public static final String BACKSLASHREPLACE = "backslashreplace";
+
+    public static final String IGNORE = "ignore";
+
+    public static final String REPLACE = "replace";
+
+    public static final String XMLCHARREFREPLACE = "xmlcharrefreplace";
+
     private static char Py_UNICODE_REPLACEMENT_CHARACTER = 0xFFFD;
 
-    private static PyList searchPath = new PyList();
-    private static PyStringMap searchCache = new PyStringMap();
+    private static PyList searchPath;
+    private static PyStringMap searchCache;
+    private static PyStringMap errorHandlers;
 
     private static String default_encoding = "ascii";
 
@@ -29,8 +42,30 @@
         lookup(encoding);
         default_encoding = encoding;
     }
+    
+    public static PyObject lookup_error(String handlerName){
+        registry_init();
+        if(handlerName == null){
+            handlerName = "strict";
+        }
+        PyObject handler =  (PyObject)errorHandlers.__finditem__(handlerName.intern());
+        if(handler == null){
+            throw new PyException(Py.LookupError,
+                                  "unknown error handler name '" + handlerName + "'");
+        }
+        return handler;
+    }
+    
+    public static void register_error(String name, PyObject error){
+        registry_init();
+        if (!error.isCallable()) {
+            throw Py.TypeError("argument must be callable");
+         }
+        errorHandlers.__setitem__(name.intern(), error);
+    }
 
     public static void register(PyObject search_function) {
+        registry_init();
         if (!search_function.isCallable()) {
            throw Py.TypeError("argument must be callable");
         }
@@ -39,7 +74,7 @@
 
 
     public static PyTuple lookup(String encoding) {
-        import_encodings();
+        registry_init();
         PyString v = new PyString(normalizestring(encoding));
         PyObject result = searchCache.__finditem__(v);
         if (result != null) {
@@ -108,15 +143,7 @@
             errors = errors.intern();
         }
 
-        /* Shortcuts for common default encodings */
-/*
-        if (encoding.equals("utf-8"))
-            return utf_8_decode(v, errors).__getitem__(0).__str__();
-        else if (encoding.equals("latin-1"))
-            ; //return PyUnicode_DecodeLatin1(s, size, errors);
-        else if (encoding.equals("ascii"))
-            ; //return PyUnicode_DecodeASCII(s, size, errors);
-*/
+        /* Shortcut for ascii encoding */
         if (encoding.equals("ascii")) {
             return PyUnicode_DecodeASCII(v.toString(),
                                                       v.__len__(), errors);
@@ -159,16 +186,12 @@
             errors = errors.intern();
         }
 
-        /* Shortcuts for common default encodings */
-/*
-        if (encoding.equals("utf-8"))
-            return PyUnicode_DecodeUTF8(v.toString(), v.__len__(), errors);
-        else if (encoding.equals("latin-1"))
-            return PyUnicode_DecodeLatin1(v.toString(), v.__len__(), errors);
-        else
-*/
+        /* Shortcuts for common default encodings.  latin-1 must not use the
+         * lookup registry for the encodigs module to work correctly */
+        if (encoding.equals("latin-1")){
+            return PyUnicode_EncodeLatin1(v.toString(), v.__len__(), errors);
 
-        if (encoding.equals("ascii")) {
+        }else if (encoding.equals("ascii")) {
             return PyUnicode_EncodeASCII(v.toString(),
                                                       v.__len__(), errors);
         }
@@ -193,8 +216,465 @@
         PyObject codecs = lookup(encoding);
         return codecs.__getitem__(0);
     }
+    
+    public static PyObject strict_errors(PyObject[] args, String[] kws){
+        ArgParser ap = new ArgParser("strict_errors", args, kws, "exc");
+        PyObject exc = ap.getPyObject(0);
+        if(Py.isInstance(exc, Py.UnicodeDecodeError)){
+            throw new PyException(Py.UnicodeDecodeError, exc);
+        }else if(Py.isInstance(exc, Py.UnicodeEncodeError)){
+            throw new PyException(Py.UnicodeEncodeError, exc);
+        }else if(Py.isInstance(exc, Py.UnicodeTranslateError)){
+            throw new PyException(Py.UnicodeTranslateError, exc);
+        }
+        throw wrong_exception_type(exc);
+    }
+    
+    public static PyObject ignore_errors(PyObject[] args, String[] kws){
+        ArgParser ap = new ArgParser("ignore_errors", args, kws, "exc");
+        PyObject exc = ap.getPyObject(0);
+        if(!isUnicodeError(exc)){
+            throw wrong_exception_type(exc);
+        }
+        PyObject end = exc.__getattr__("end");
+        return new PyTuple(new PyObject[]{Py.java2py(""), end});
+    }
 
+    private static boolean isUnicodeError(PyObject exc) {
+        return Py.isInstance(exc, Py.UnicodeDecodeError) ||
+                Py.isInstance(exc, Py.UnicodeEncodeError) || 
+                Py.isInstance(exc, Py.UnicodeTranslateError);
+    }
+    
+    public static PyObject replace_errors(PyObject[] args, String[] kws){
+        ArgParser ap = new ArgParser("replace_errors", args, kws, "exc");
+        PyObject exc = ap.getPyObject(0);
+        if(Py.isInstance(exc, Py.UnicodeDecodeError)){
+            PyObject end = exc.__getattr__("end");
+            return new PyTuple(new PyObject[]{new PyUnicode(Py_UNICODE_REPLACEMENT_CHARACTER), end});
+        }else if(Py.isInstance(exc, Py.UnicodeEncodeError)){
+            PyObject end = exc.__getattr__("end");
+        return new PyTuple(new PyObject[]{Py.java2py("?"), end});
+        }else if(Py.isInstance(exc, Py.UnicodeTranslateError)){
+            PyObject end = exc.__getattr__("end");
+            return new PyTuple(new PyObject[]{new PyUnicode(Py_UNICODE_REPLACEMENT_CHARACTER), end});
+            }
+        throw wrong_exception_type(exc);
+    }
+    
+    public static PyObject xmlcharrefreplace_errors(PyObject[] args, String[] kws){
+        ArgParser ap = new ArgParser("xmlcharrefreplace_errors", args, kws, "exc");
+        PyObject exc = ap.getPyObject(0);
+        if(!Py.isInstance(exc, Py.UnicodeEncodeError)){
+            throw wrong_exception_type(exc);
+        }
+        int start = ((PyInteger)exc.__getattr__("start")).getValue();
+        int end = ((PyInteger)exc.__getattr__("end")).getValue();
+        String object = exc.__getattr__("object").toString();
+        StringBuffer replacement = new StringBuffer();
+        xmlcharrefreplace_internal(start, end, object, replacement);
+        return new PyTuple(new PyObject[]{Py.java2py(replacement.toString()), exc.__getattr__("end")});
+    }
+    
+    public static StringBuffer xmlcharrefreplace(int start, int end, String toReplace){
+        StringBuffer replacement = new StringBuffer();
+        xmlcharrefreplace_internal(start, end, toReplace, replacement);
+        return replacement;
+    }
 
+    private static void xmlcharrefreplace_internal(int start, int end, String object, StringBuffer replacement) {
+        for(int i = start; i < end; i++) {
+            replacement.append("&#");
+            char cur = object.charAt(i);
+            int digits;
+            int base;
+            if(cur < 10) {
+                digits = 1;
+                base = 1;
+            } else if(cur < 100) {
+                digits = 2;
+                base = 10;
+            } else if(cur < 1000) {
+                digits = 3;
+                base = 100;
+            } else if(cur < 10000) {
+                digits = 4;
+                base = 1000;
+            } else if(cur < 100000) {
+                digits = 5;
+                base = 10000;
+            } else if(cur < 1000000) {
+                digits = 6;
+                base = 100000;
+            } else {
+                digits = 7;
+                base = 1000000;
+            }
+            while(digits-- > 0) {
+                replacement.append((char)('0' + cur / base));
+                cur %= base;
+                base /= 10;
+            }
+            replacement.append(';');
+        }
+    }
+    
+    private static PyException wrong_exception_type(PyObject exc) {
+        PyObject excClass = exc.__getattr__("__class__");
+        PyObject className = excClass.__getattr__("__name__");
+        return new PyException(Py.TypeError, "Don't know how to handle "
+                + className + " in error callback");
+    }
+
+
+    static char hexdigits[] = {
+                                      '0', '1', '2', '3', '4', '5', '6', '7',
+                                      '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+                                  };
+    
+    public static PyObject backslashreplace_errors(PyObject[] args, String[] kws){
+        ArgParser ap = new ArgParser("backslashreplace_errors", args, kws, "exc");
+        PyObject exc = ap.getPyObject(0);
+        if(!Py.isInstance(exc, Py.UnicodeEncodeError)){
+            throw wrong_exception_type(exc);
+        }
+        int start = ((PyInteger)exc.__getattr__("start")).getValue();
+        int end = ((PyInteger)exc.__getattr__("end")).getValue();
+        String object = exc.__getattr__("object").toString();
+        StringBuffer replacement = new StringBuffer();
+        backslashreplace_internal(start, end, object, replacement);
+        return new PyTuple(new PyObject[]{Py.java2py(replacement.toString()), exc.__getattr__("end")});
+    }
+    
+    public static StringBuffer backslashreplace(int start, int end, String toReplace){
+        StringBuffer replacement = new StringBuffer();
+        backslashreplace_internal(start, end, toReplace, replacement);
+        return replacement;
+    }
+
+    private static void backslashreplace_internal(int start, int end, String object, StringBuffer replacement) {
+        for(int i = start; i < end; i++) {
+            replacement.append('\\');
+            char c = object.charAt(i);
+            if(c >= 0x00010000) {
+                replacement.append('U');
+                replacement.append(hexdigits[(c >> 28) & 0xf]);
+                replacement.append(hexdigits[(c >> 24) & 0xf]);
+                replacement.append(hexdigits[(c >> 20) & 0xf]);
+                replacement.append(hexdigits[(c >> 16) & 0xf]);
+                replacement.append(hexdigits[(c >> 12) & 0xf]);
+                replacement.append(hexdigits[(c >> 8) & 0xf]);
+            } else if(c >= 0x100) {
+                replacement.append('u');
+                replacement.append(hexdigits[(c >> 12) & 0xf]);
+                replacement.append(hexdigits[(c >> 8) & 0xf]);
+            } else
+                replacement.append('x');
+            replacement.append(hexdigits[(c >> 4) & 0xf]);
+            replacement.append(hexdigits[c & 0xf]);
+        }
+    }
+    
+    private static void registry_init(){
+        if(searchPath != null)
+            return;
+        searchPath = new PyList();
+        searchCache = new PyStringMap();
+        errorHandlers = new PyStringMap();
+        String[] builtinErrorHandlers = new String[] {"strict",
+                                                      IGNORE,
+                                                      REPLACE,
+                                                      XMLCHARREFREPLACE,
+                                                      BACKSLASHREPLACE};
+        for(int i = 0; i < builtinErrorHandlers.length; i++) {
+            register_error(builtinErrorHandlers[i],
+                           Py.newJavaFunc(codecs.class, builtinErrorHandlers[i]
+                                   + "_errors"));
+        }
+        import_encodings();
+    }
+    /* --- UTF-7 Codec -------------------------------------------------------- */
+
+    /* see RFC2152 for details */
+
+
+    public static 
+    char utf7_special[] = {
+        /*
+         * indicate whether a UTF-7 character is special i.e. cannot be directly
+         * encoded: 0 - not special 1 - special 2 - whitespace (optional) 3 -
+         * RFC2152 Set O (optional)
+         */
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 1, 0, 0, 0, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
+        3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3,
+        3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 1, 1,
+
+    };
+    
+    private static boolean SPECIAL(char c, boolean encodeO, boolean encodeWS){
+    return (c>127 || utf7_special[(c)] == 1) || 
+     (encodeWS && (utf7_special[(c)] == 2)) || 
+     (encodeO && (utf7_special[(c)] == 3));
+    }
+    
+    private static final String B64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+    
+    private static char B64(int n){  
+        return B64_CHARS.charAt(n & 0x3f);
+    }
+    
+    private static boolean B64CHAR(char c) {
+        return B64_CHARS.indexOf(c) != -1;
+    } 
+    
+    private static int UB64(char c) {
+        return ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ?
+            (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4);
+    }
+    
+    public static String PyUnicode_DecodeUTF7(String str,
+                                               String errors) {
+        int s = 0;
+        int e = str.length();
+        boolean inShift = false;
+        int bitsInCharsleft = 0;
+        long charsleft = 0;
+        boolean surrogate = false;
+        char highOrderSurrogate = 0;
+        StringBuffer unicode = new StringBuffer(e);
+        while(s < e) {
+            // restart:
+            char ch = str.charAt(s);
+            if(inShift) {
+                if((ch == '-') || !B64CHAR(ch)) {
+                    inShift = false;
+                    s++;
+                    while(bitsInCharsleft >= 16) {
+                        bitsInCharsleft -= 16;
+                        char outCh = (char)((charsleft >> bitsInCharsleft) & 0xffff);
+                        if(surrogate) {
+                            if(0xD800 <= outCh && outCh <= 0xDBFF) {
+                                unicode.append(highOrderSurrogate);
+                                unicode.append(outCh);
+                            } else {
+                                s = codecs.insertReplacementAndGetResume(unicode,
+                                                                         errors,
+                                                                         "utf-16",
+                                                                         str,
+                                                                         s,
+                                                                         s + 1,
+                                                                         "illegal UTF-16 surrogate");
+                            }
+                            surrogate = false;
+                        } else if(0xDC00 <= outCh && outCh <= 0xDFFF) {
+                            surrogate = true;
+                            highOrderSurrogate = outCh;
+                        } else {
+                            unicode.append(outCh);
+                        }
+                    }
+                    if(bitsInCharsleft >= 6) {
+                        /*
+                         * The shift sequence has a partial character in it. If
+                         * bitsleft < 6 then we could just classify it as
+                         * padding but that is not the case here
+                         */
+                        s = insertReplacementAndGetResume(unicode,
+                                                          errors,
+                                                          "utf-7",
+                                                          str,
+                                                          s,
+                                                          s + 1,
+                                                          "partial character in shift sequence");
+                    }
+                    /*
+                     * According to RFC2152 the remaining bits should be zero.
+                     * We choose to signal an error/insert a replacement
+                     * character here so indicate the potential of a misencoded
+                     * character.
+                     */
+                    if(bitsInCharsleft > 0 && ((charsleft << 5 - bitsInCharsleft) & 0x1f) > 0){
+                                s = insertReplacementAndGetResume(unicode,
+                                                                  errors,
+                                                                  "utf-7",
+                                                                  str,
+                                                                  s,
+                                                                  s + 1,
+                                                                  "non-zero padding bits in shift sequence");
+                    }
+                    if(ch == '-') {
+                        if((s < e) && (str.charAt(s) == '-')) {
+                            unicode.append('-');
+                            inShift = true;
+                        }
+                    } else if(SPECIAL(ch, false, false)) {
+                        s = insertReplacementAndGetResume(unicode,
+                                                          errors,
+                                                          "utf-7",
+                                                          str,
+                                                          s,
+                                                          s + 1,
+                                                          "unexpected special character");
+                    } else {
+                        unicode.append(ch);
+                    }
+                } else {
+                    charsleft = (charsleft << 6) | UB64(ch);
+                    bitsInCharsleft += 6;
+                    s++;
+                    while(bitsInCharsleft >= 16) {
+                        bitsInCharsleft -= 16;
+                        char outCh = (char)((charsleft >> bitsInCharsleft) & 0xffff);
+                        if(surrogate) {
+                            if(0xD800 <= outCh && outCh <= 0xDBFF) {
+                                unicode.append(highOrderSurrogate);
+                                unicode.append(outCh);
+                            } else {
+                                s = codecs.insertReplacementAndGetResume(unicode,
+                                                                         errors,
+                                                                         "utf-16",
+                                                                         str,
+                                                                         s,
+                                                                         s + 1,
+                                                                         "illegal UTF-16 surrogate");
+                            }
+                            surrogate = false;
+                        } else if(0xDC00 <= outCh && outCh <= 0xDFFF) {
+                            surrogate = true;
+                            highOrderSurrogate = outCh;
+                        } else {
+                            unicode.append(outCh);
+                        }
+                    }
+                }
+            } else if(ch == '+') {
+                s++;
+                if(s < e && str.charAt(s) == '-') {
+                    s++;
+                    unicode.append('+');
+                } else {
+                    inShift = true;
+                    bitsInCharsleft = 0;
+                }
+            } else if(SPECIAL(ch, false, false)) {
+                s = insertReplacementAndGetResume(unicode,
+                                                  errors,
+                                                  "utf-7",
+                                                  str,
+                                                  s,
+                                                  s + 1,
+                                                  "unexpected special character");
+            } else {
+                unicode.append(ch);
+                s++;
+            }
+            if(inShift && s == e) {
+                s = insertReplacementAndGetResume(unicode,
+                                                  errors,
+                                                  "utf-7",
+                                                  str,
+                                                  s,
+                                                  s,
+                                                  "unterminated shift sequence");
+            }
+        }
+        return unicode.toString();
+    }
+
+
+    public static String PyUnicode_EncodeUTF7(String str,
+                       boolean encodeSetO,
+                       boolean encodeWhiteSpace,
+                      String errors)
+    {
+        int size = str.length();
+
+        if (size == 0)
+            return "";
+        boolean inShift = false;
+        int bitsleft = 0;
+         int charsleft = 0;
+
+        StringBuffer v = new StringBuffer();
+
+        for (int i = 0;i < size; ++i) {
+            char ch = str.charAt(i);
+
+            if (!inShift) {
+                if (ch == '+') {
+                    v.append('+');
+                    v.append('-');
+                } else if (SPECIAL(ch, encodeSetO, encodeWhiteSpace)) {
+                    charsleft = ch;
+                    bitsleft = 16;
+                    v.append('+');
+                    while (bitsleft >= 6) { 
+                        v.append(B64(charsleft >> (bitsleft-6))); 
+                        bitsleft -= 6; 
+                    }
+                    inShift = bitsleft > 0;
+                } else {
+                    v.append((char) ch);
+                }
+            } else {
+                if (!SPECIAL(ch, encodeSetO, encodeWhiteSpace)) {
+                    v.append(B64(charsleft << (6-bitsleft)));
+                    charsleft = 0;
+                    bitsleft = 0;
+                    /* Characters not in the BASE64 set implicitly unshift the sequence
+                       so no '-' is required, except if the character is itself a '-' */
+                    if (B64CHAR(ch) || ch == '-') {
+                        v.append('-');
+                    }
+                    inShift = false;
+                    v.append( ch);
+                } else {
+                    bitsleft += 16;
+                    charsleft = (charsleft << 16) | ch;    
+                    while (bitsleft >= 6) { 
+                        v.append(B64(charsleft >> (bitsleft-6))); 
+                        bitsleft -= 6; 
+                    }
+                    /* If the next character is special then we dont' need to terminate
+                       the shift sequence. If the next character is not a BASE64 character 
+                       or '-' then the shift sequence will be terminated implicitly and we
+                       don't have to insert a '-'. */
+
+                    if (bitsleft == 0) {
+                        if (i + 1 < size) {
+                            char ch2 = str.charAt(i+1);
+
+                            if (SPECIAL(ch2, encodeSetO, encodeWhiteSpace)) {
+                               
+                            } else if (B64CHAR(ch2) || ch2 == '-') {
+                                v.append('-');
+                                inShift = false;
+                            } else {
+                                inShift = false;
+                            }
+
+                        }
+                        else {
+                            v.append('-');
+                            inShift = false;
+                        }
+                    }
+                }            
+            }
+        }
+        if (bitsleft > 0) {
+            v.append(B64(charsleft << (6-bitsleft)));
+            v.append('-');
+        }
+        return v.toString();
+    }
+
+    
     /* --- UTF-8 Codec ---------------------------------------------------- */
     private static byte utf8_code_length[] = {
        /* Map UTF-8 encoded prefix byte to sequence length.  zero means
@@ -225,53 +705,41 @@
         /* Unpack UTF-8 encoded data */
         for (int i = 0; i < size; ) {
             int ch = str.charAt(i);
-            if (ch > 0xFF) {
-                codecs.decoding_error("utf-8", unicode, errors,
-                                      "ordinal not in range(255)");
-                i++;
-                continue;
-            }
 
             if (ch < 0x80) {
                 unicode.append((char) ch);
                 i++;
                 continue;
             }
+            if (ch > 0xFF) {
+                i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 1, "ordinal not in range(255)");
+                continue;
+            }
 
             int n = utf8_code_length[ch];
 
             if (i + n > size) {
-                codecs.decoding_error("utf-8", unicode, errors,
-                                      "unexpected end of data");
-                i++;
+                i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 1, "unexpected end of data");
                 continue;
             }
 
 
             switch (n) {
             case 0:
-                codecs.decoding_error("utf-8", unicode, errors,
-                                      "unexpected code byte");
-                i++;
+                i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 1, "unexpected code byte");
                 continue;
             case 1:
-                codecs.decoding_error("utf-8", unicode, errors,
-                                      "internal error");
-                i++;
+                i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 1, "internal error");
                 continue;
             case 2:
                 char ch1 = str.charAt(i+1);
                 if ((ch1 & 0xc0) != 0x80) {
-                    codecs.decoding_error("utf-8", unicode, errors,
-                                          "invalid data");
-                    i++;
+                    i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 2, "invalid data");
                     continue;
                 }
                 ch = ((ch & 0x1f) << 6) + (ch1 & 0x3f);
                 if (ch < 0x80) {
-                    codecs.decoding_error("utf-8", unicode, errors,
-                                          "illegal encoding");
-                    i++;
+                    i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 2, "illegal encoding");
                     continue;
                 } else
                     unicode.append((char) ch);
@@ -281,16 +749,12 @@
                 ch1 = str.charAt(i+1);
                 char ch2 = str.charAt(i+2);
                 if ((ch1 & 0xc0) != 0x80 || (ch2 & 0xc0) != 0x80) {
-                    codecs.decoding_error("utf-8", unicode, errors,
-                                          "invalid data");
-                    i++;
+                    i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 3, "invalid data");
                     continue;
                 }
                 ch = ((ch & 0x0f) << 12) + ((ch1 & 0x3f) << 6) + (ch2 & 0x3f);
                 if (ch < 0x800 || (ch >= 0xd800 && ch < 0xe000)) {
-                    codecs.decoding_error("utf-8", unicode, errors,
-                                          "illegal encoding");
-                    i++;
+                    i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 3, "illegal encoding");
                     continue;
                 } else
                    unicode.append((char) ch);
@@ -303,9 +767,7 @@
                 if ((ch1 & 0xc0) != 0x80 ||
                     (ch2 & 0xc0) != 0x80 ||
                     (ch3 & 0xc0) != 0x80) {
-                    codecs.decoding_error("utf-8", unicode, errors,
-                                          "invalid data");
-                    i++;
+                    i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 4, "invalid data");
                     continue;
                 }
                 ch = ((ch & 0x7) << 18) + ((ch1 & 0x3f) << 12) +
@@ -315,9 +777,7 @@
                                            byte encoding */
                     (ch > 0x10ffff)) {  /* maximum value allowed for
                                            UTF-16 */
-                    codecs.decoding_error("utf-8", unicode, errors,
-                                          "illegal encoding");
-                    i++;
+                    i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + 4, "illegal encoding");
                     continue;
                 }
                 /*  compute and append the two surrogates: */
@@ -334,9 +794,8 @@
 
             default:
                 /* Other sizes are only needed for UCS-4 */
-                codecs.decoding_error("utf-8", unicode, errors,
-                                      "unsupported Unicode code range");
-                i++;
+                i = insertReplacementAndGetResume(unicode, errors, "utf-8", str, i, i + n, "unsupported Unicode code range");
+                continue;
             }
             i += n;
         }
@@ -380,22 +839,33 @@
     }
 
 
-
-    /* --- 7-bit ASCII Codec -------------------------------------------- */
-
     public static String PyUnicode_DecodeASCII(String str, int size,
                                                String errors)
     {
+        return PyUnicode_DecodeIntLimited(str, size, errors, "ascii", 128);
+    }
+    
+    public static String PyUnicode_DecodeLatin1(String str, int size,
+                                               String errors)
+    {
+        return PyUnicode_DecodeIntLimited(str, size, errors, "latin-1", 256);
+    }
+    
+    private static String PyUnicode_DecodeIntLimited(String str, int size,String errors, String encoding, int limit){
         StringBuffer v = new StringBuffer(size);
 
+        String reason = "ordinal not in range(" + limit + ")";
         for (int i = 0; i < size; i++) {
             char ch = str.charAt(i);
-            if (ch < 128) {
+            if (ch < limit) {
                 v.append(ch);
             } else {
-                decoding_error("ascii", v, errors,
-                               "ordinal not in range(128)");
-                continue;
+                i = insertReplacementAndGetResume(v,errors, 
+                                            encoding,
+                                            str,
+                                            i,
+                                            i + 1,
+                                            reason) - 1;
             }
         }
 
@@ -406,19 +876,84 @@
     public static String PyUnicode_EncodeASCII(String str, int size,
                                                String errors)
     {
+        return PyUnicode_EncodeIntLimited(str, size, errors, "ascii", 128);
+    }
+
+
+    public static String PyUnicode_EncodeLatin1(String str, int size,
+                                               String errors)
+    {
+
+        return PyUnicode_EncodeIntLimited(str, size, errors, "latin-1", 256);
+    }
+
+
+    private static String PyUnicode_EncodeIntLimited(String str, int size,
+                                               String errors, String encoding, int limit)
+    {
+        String reason = "ordinal not in range(" + limit + ")";
         StringBuffer v = new StringBuffer(size);
-
-        for (int i = 0; i < size; i++) {
+        for(int i = 0; i < size; i++) {
             char ch = str.charAt(i);
-            if (ch >= 128) {
-                encoding_error("ascii", v, errors,
-                               "ordinal not in range(128)");
+            if(ch >= limit) {
+                int nextGood = i + 1;
+                for(; nextGood < size; nextGood++) {
+                    if(str.charAt(nextGood) < limit) {
+                        break;
+                    }
+                }
+                if(errors != null) {
+                    if(errors.equals(IGNORE)) {
+                        i = nextGood - 1;
+                        continue;
+                    } else if(errors.equals(REPLACE)) {
+                        for(int j = i; j < nextGood; j++) {
+                            v.append('?');
+                        }
+                        i = nextGood - 1;
+                        continue;
+                    } else if(errors.equals(XMLCHARREFREPLACE)) {
+                        v.append(xmlcharrefreplace(i, nextGood, str));
+                        i = nextGood - 1;
+                        continue;
+                    } else if(errors.equals(BACKSLASHREPLACE)) {
+                        v.append(backslashreplace(i, nextGood, str));
+                        i = nextGood - 1;
+                        continue;
+                    }
+                }
+                PyObject replacement = encoding_error(errors,
+                                                      encoding,
+                                                      str,
+                                                      i,
+                                                      nextGood,
+                                                      reason);
+                String replStr = replacement.__getitem__(0).toString();
+                for(int j = 0; j < replStr.length(); j++) {
+                    if(replStr.charAt(j) >= limit) {
+                        throw Py.UnicodeEncodeError(encoding, str, i + j, i + j
+                                + 1, reason);
+                    }
+                }
+                v.append(replStr);
+                i = calcNewPosition(size, replacement) - 1;
             } else {
                 v.append(ch);
             }
         }
         return v.toString();
     }
+    
+    public static int calcNewPosition(int size, PyObject errorTuple) {
+        int newPosition = ((PyInteger)errorTuple.__getitem__(1)).getValue();
+        if(newPosition < 0){
+            newPosition = size + newPosition;
+        }
+        if(newPosition > size || newPosition < 0){
+            throw Py.IndexError(newPosition + " out of bounds of encoded string");
+        }
+        return newPosition;
+    }
 
 
 
@@ -457,85 +992,128 @@
     {
         int size = str.length();
         StringBuffer v = new StringBuffer(size);
-
-        for (int i = 0; i < size; ) {
+        for(int i = 0; i < size;) {
             char ch = str.charAt(i);
-
             /* Non-escape characters are interpreted as Unicode ordinals */
-            if (ch != '\\') {
+            if(ch != '\\') {
                 v.append(ch);
                 i++;
                 continue;
             }
-
-            /* \\u-escapes are only interpreted iff the number of leading
-               backslashes is odd */
+            /*
+             * \\u-escapes are only interpreted iff the number of leading
+             * backslashes is odd
+             */
             int bs = i;
-            while (i < size) {
+            while(i < size) {
                 ch = str.charAt(i);
-                if (ch != '\\')
+                if(ch != '\\')
                     break;
                 v.append(ch);
                 i++;
             }
-            if (((i - bs) & 1) == 0 || i >= size || ch != 'u') {
+            if(((i - bs) & 1) == 0 || i >= size || ch != 'u') {
                 continue;
             }
             v.setLength(v.length() - 1);
             i++;
-
             /* \\uXXXX with 4 hex digits */
-            int x = 0;
-            for (int j = 0; j < 4; j++) {
-                ch = str.charAt(i+j);
-                int d  = Character.digit(ch, 16);
-                if (d == -1) {
-                    codecs.decoding_error("unicode escape", v, errors,
-                                          "truncated \\uXXXX");
+            int x = 0, d = 0, j = 0;
+            for(; j < 4; j++) {
+                ch = str.charAt(i + j);
+                d = Character.digit(ch, 16);
+                if(d == -1) {
                     break;
                 }
-                x = ((x<<4) & ~0xF) + d;
+                x = ((x << 4) & ~0xF) + d;
             }
-            i += 4;
-            v.append((char) x);
-       }
-       return v.toString();
+            if(d == -1) {
+                i = codecs.insertReplacementAndGetResume(v,
+                                                         errors,
+                                                         "unicodeescape",
+                                                         str,
+                                                         bs,
+                                                         i + j,
+                                                         "truncated \\uXXXX");
+            } else {
+                i += 4;
+                v.append((char)x);
+            }
+        }
+        return v.toString();
     }
 
-
     /* --- Utility methods -------------------------------------------- */
+    public static PyObject encoding_error(String errors,
+                                          String encoding,
+                                          String toEncode,
+                                          int start,
+                                          int end,
+                                          String reason) {
+        PyObject errorHandler = lookup_error(errors);
+        PyException exc = Py.UnicodeEncodeError(encoding,
+                                                toEncode,
+                                                start,
+                                                end,
+                                                reason);
+        exc.instantiate();
+        PyObject replacement = errorHandler.__call__(new PyObject[] {exc.value});
+        checkErrorHandlerReturn(errors, replacement);
+        return replacement;
+    }
 
-    public static void encoding_error(String type, StringBuffer dest,
-                                      String errors, String details)
-    {
-        if (errors == null || errors == "strict") {
-            throw Py.UnicodeError(type + " encoding error: " + details);
-        } else if (errors == "ignore") {
-            //ignore
-        } else if (errors == "replace") {
-            dest.append('?');
-        } else {
-            throw Py.ValueError(type + " encoding error; "+
-                                "unknown error handling code: " + errors);
+    public static int insertReplacementAndGetResume(StringBuffer partialDecode,
+                                                    String errors,
+                                                    String encoding,
+                                                    String toDecode,
+                                                    int start,
+                                                    int end,
+                                                    String reason) {
+        if(errors != null) {
+            if(errors.equals(IGNORE)) {
+                return end;
+            } else if(errors.equals(REPLACE)) {
+                while(start < end) {
+                    partialDecode.append(Py_UNICODE_REPLACEMENT_CHARACTER);
+                    start++;
+                }
+                return end;
+            }
         }
+        PyObject replacement = decoding_error(errors,
+                                              encoding,
+                                              toDecode,
+                                              start,
+                                              end,
+                                              reason);
+        checkErrorHandlerReturn(errors, replacement);
+        partialDecode.append(replacement.__getitem__(0).toString());
+        return calcNewPosition(toDecode.length(), replacement);
     }
 
+    public static PyObject decoding_error(String errors,
+                                          String encoding,
+                                          String toEncode,
+                                          int start,
+                                          int end,
+                                          String reason) {
+        PyObject errorHandler = lookup_error(errors);
+        PyException exc = Py.UnicodeDecodeError(encoding,
+                                                toEncode,
+                                                start,
+                                                end,
+                                                reason);
+        exc.instantiate();
+        return errorHandler.__call__(new PyObject[] {exc.value});
+    }
 
-    public static void decoding_error(String type, StringBuffer dest,
-                                      String errors, String details)
-    {
-        if (errors == null || errors == "strict") {
-            throw Py.UnicodeError(type + " decoding error: " + details);
+    private static void checkErrorHandlerReturn(String errors,
+                                                PyObject replacement) {
+        if(!(replacement instanceof PyTuple) || replacement.__len__() != 2
+                || !(replacement.__getitem__(0) instanceof PyBaseString)
+                || !(replacement.__getitem__(1) instanceof PyInteger)) {
+            throw new PyException(Py.TypeError, "error_handler " + errors
+                    + " must return a tuple of (replacement, new position)");
         }
-        else if (errors == "ignore") {
-            //ignore
-        } else if (errors == "replace") {
-            if (dest != null) {
-                dest.append(Py_UNICODE_REPLACEMENT_CHARACTER);
-            }
-        } else {
-            throw Py.ValueError(type + " decoding error; "+
-                                "unknown error handling code: " + errors);
-        }
     }
 }
Index: src/org/python/core/PyString.java
===================================================================
--- src/org/python/core/PyString.java	(revision 2833)
+++ src/org/python/core/PyString.java	(working copy)
@@ -1614,7 +1614,7 @@
             public PyObject __call__(PyObject arg0) {
                 String result=self.str_join(arg0);
                 //XXX: do we really need to check self?
-                if (self instanceof PyUnicode||arg0 instanceof PyUnicode) {
+                if (self instanceof PyUnicode||(arg0.__len__() > 0 && arg0.__getitem__(0) instanceof PyUnicode)) {
                     return new PyUnicode(result);
                 } else {
                     return new PyString(result);
@@ -1625,7 +1625,8 @@
                 PyString self=(PyString)gself;
                 String result=self.str_join(arg0);
                 //XXX: do we really need to check self?
-                if (self instanceof PyUnicode||arg0 instanceof PyUnicode) {
+                System.out.println("INST CALL");
+                if (self instanceof PyUnicode||(arg0.__len__() > 0 && arg0.__getitem__(0) instanceof PyUnicode)) {
                     return new PyUnicode(result);
                 } else {
                     return new PyString(result);
@@ -2926,7 +2927,6 @@
         StringBuffer v = new StringBuffer(str.length());
 
         char quote = 0;
-        boolean unicode = false;
 
         if (use_quotes) {
             quote = str.indexOf('\'') >= 0 &&
@@ -2940,13 +2940,32 @@
             if (use_quotes && (ch == quote || ch == '\\')) {
                 v.append('\\');
                 v.append((char) ch);
+                continue;
             }
+                /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
+                else if (ch >= 0xD800 && ch < 0xDC00) {
+                    char ch2 = str.charAt(i++);
+                    size--;
+                    if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
+                    int ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
+                    v.append('\\');
+                    v.append('U');
+                    v.append(hexdigit[(ucs >> 28) & 0xf]);
+                    v.append(hexdigit[(ucs >> 24) & 0xf]);
+                    v.append(hexdigit[(ucs >> 20) & 0xf]);
+                    v.append(hexdigit[(ucs >> 16) & 0xf]);
+                    v.append(hexdigit[(ucs >> 12) & 0xf]);
+                    v.append(hexdigit[(ucs >> 8) & 0xf]);
+                    v.append(hexdigit[(ucs >> 4) & 0xf]);
+                    v.append(hexdigit[ucs & 0xf]);
+                    continue;
+                    }
+                    /* Fall through: isolated surrogates are copied as-is */
+                    i--;
+                    size++;
+                }
             /* Map 16-bit characters to '\\uxxxx' */
-            else if (ch >= 256) {
-                if (use_quotes && !unicode) {
-                   v.insert(0, 'u');
-                   unicode = true;
-                }
+             if (ch >= 256) {
                 v.append('\\');
                 v.append('u');
                 v.append(hexdigit[(ch >> 12) & 0xf]);
@@ -2961,9 +2980,10 @@
             else if (use_quotes && ch == '\f') v.append("\\f");
             else if (use_quotes && ch == '\r') v.append("\\r");
             else if (ch < ' ' || ch >= 127) {
-                v.append("\\x");
-                v.append(hexdigit[(ch >> 4) & 0xF]);
-                v.append(hexdigit[ch & 0xF]);
+                v.append('\\');
+                v.append('x');
+                v.append(hexdigit[(ch >> 4) & 0xf]);
+                v.append(hexdigit[ch & 0xf]);
             }
             /* Copy everything else as-is */
             else
@@ -2976,177 +2996,265 @@
 
     private static ucnhashAPI pucnHash = null;
 
-    public static String decode_UnicodeEscape(String str, int start, int end,
-                                              String errors, boolean unicode)
-    {
-        StringBuffer v = new StringBuffer(end-start);
-        for (int s = start; s < end; ) {
+    
+    public static String decode_UnicodeEscape(String str,
+                                              int start,
+                                              int end,
+                                              String errors,
+                                              boolean unicode) {
+        StringBuffer v = new StringBuffer(end - start);
+        for(int s = start; s < end;) {
             char ch = str.charAt(s);
-
             /* Non-escape characters are interpreted as Unicode ordinals */
-            if (ch != '\\') {
+            if(ch != '\\') {
                 v.append(ch);
                 s++;
                 continue;
             }
-
+            int loopStart = s;
             /* \ - Escapes */
             s++;
+            if(s == end) {
+                s = codecs.insertReplacementAndGetResume(v,
+                                                         errors,
+                                                         "unicodeescape",
+                                                         str,
+                                                         loopStart,
+                                                         s + 1,
+                                                         "\\ at end of string");
+                continue;
+            }
             ch = str.charAt(s++);
-            switch (ch) {
-
-            /* \x escapes */
-            case '\n': break;
-            case '\\': v.append('\\'); break;
-            case '\'': v.append('\''); break;
-            case '\"': v.append('\"'); break;
-            case 'b': v.append('\b'); break;
-            case 'f': v.append('\014'); break; /* FF */
-            case 't': v.append('\t'); break;
-            case 'n': v.append('\n'); break;
-            case 'r': v.append('\r'); break;
-            case 'v': v.append('\013'); break; /* VT */
-            case 'a': v.append('\007'); break; /* BEL, not classic C */
-
-            /* \OOO (octal) escapes */
-            case '0': case '1': case '2': case '3':
-            case '4': case '5': case '6': case '7':
-
-                int x = Character.digit(ch, 8);
-                for (int j = 0; j < 2 && s < end; j++, s++) {
-                    ch = str.charAt(s);
-                    if (ch < '0' || ch > '7')
+            switch(ch){
+                /* \x escapes */
+                case '\n':
+                    break;
+                case '\\':
+                    v.append('\\');
+                    break;
+                case '\'':
+                    v.append('\'');
+                    break;
+                case '\"':
+                    v.append('\"');
+                    break;
+                case 'b':
+                    v.append('\b');
+                    break;
+                case 'f':
+                    v.append('\014');
+                    break; /* FF */
+                case 't':
+                    v.append('\t');
+                    break;
+                case 'n':
+                    v.append('\n');
+                    break;
+                case 'r':
+                    v.append('\r');
+                    break;
+                case 'v':
+                    v.append('\013');
+                    break; /* VT */
+                case 'a':
+                    v.append('\007');
+                    break; /* BEL, not classic C */
+                /* \OOO (octal) escapes */
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                    int x = Character.digit(ch, 8);
+                    for(int j = 0; j < 2 && s < end; j++, s++) {
+                        ch = str.charAt(s);
+                        if(ch < '0' || ch > '7')
+                            break;
+                        x = (x << 3) + Character.digit(ch, 8);
+                    }
+                    v.append((char)x);
+                    break;
+                case 'x':
+                    s = hexescape(v, errors, 2, s, str, end, "truncated \\xXX");
+                    break;
+                case 'u':
+                    if(!unicode) {
+                        v.append('\\');
+                        v.append('u');
                         break;
-                    x = (x<<3) + Character.digit(ch, 8);
-                }
-                v.append((char) x);
-                break;
-
-            case 'x':
-                int i;
-                for (x = 0, i = 0; i < 2 && s < end; i++) {
-                    ch = str.charAt(s + i);
-                    int d = Character.digit(ch, 16);
-                    if (d == -1) {
-                        codecs.decoding_error("unicode escape", v, errors,
-                                                     "truncated \\xXX");
-                        i++;
+                    }
+                    s = hexescape(v,
+                                  errors,
+                                  4,
+                                  s,
+                                  str,
+                                  end,
+                                  "truncated \\uXXXX");
+                    break;
+                case 'U':
+                    if(!unicode) {
+                        v.append('\\');
+                        v.append('U');
                         break;
                     }
-
-                    x = ((x<<4) & ~0xF) + d;
-                }
-                s += i;
-                v.append((char) x);
-                break;
-
-            /* \ uXXXX with 4 hex digits */
-            case 'u':
-                if (!unicode) {
-                    v.append('\\');
-                    v.append('u');
+                    s = hexescape(v,
+                                  errors,
+                                  8,
+                                  s,
+                                  str,
+                                  end,
+                                  "truncated \\UXXXXXXXX");
                     break;
-                }
-                if (s+4 > end) {
-                    codecs.decoding_error("unicode escape", v, errors,
-                                              "truncated \\uXXXX");
-                    break;
-                }
-                for (x = 0, i = 0; i < 4; i++) {
-                    ch = str.charAt(s + i);
-                    int d  = Character.digit(ch, 16);
-                    if (d == -1) {
-                        codecs.decoding_error("unicode escape", v, errors,
-                                              "truncated \\uXXXX");
+                case 'N':
+                    if(!unicode) {
+                        v.append('\\');
+                        v.append('N');
                         break;
                     }
-                    x = ((x<<4) & ~0xF) + d;
-                }
-                s += i;
-                v.append((char) x);
-                break;
-
-            case 'N':
-                if (!unicode) {
-                    v.append('\\');
-                    v.append('N');
-                    break;
-                }
-                /* Ok, we need to deal with Unicode Character Names now,
-                 * make sure we've imported the hash table data...
-                 */
-                if (pucnHash == null) {
-                     PyObject mod = imp.importName("ucnhash", true);
-                     mod = mod.__call__();
-                     pucnHash = (ucnhashAPI) mod.__tojava__(Object.class);
-                     if (pucnHash.getCchMax() < 0)
-                         codecs.decoding_error("unicode escape", v, errors,
-                                 "Unicode names not loaded");
-                }
-
-                if (str.charAt(s) == '{') {
-                    int startName = s + 1;
-                    int endBrace = startName;
-
-                    /* look for either the closing brace, or we
-                     * exceed the maximum length of the unicode
-                     * character names
+                    /*
+                     * Ok, we need to deal with Unicode Character Names now,
+                     * make sure we've imported the hash table data...
                      */
-                    int maxLen = pucnHash.getCchMax();
-                    while (endBrace < end && str.charAt(endBrace) != '}'
-                           && (endBrace - startName) <= maxLen) {
-                        endBrace++;
+                    if(pucnHash == null) {
+                        PyObject mod = imp.importName("ucnhash", true);
+                        mod = mod.__call__();
+                        pucnHash = (ucnhashAPI)mod.__tojava__(Object.class);
+                        if(pucnHash.getCchMax() < 0)
+                            throw Py.UnicodeError("Unicode names not loaded");
                     }
-                    if (endBrace != end && str.charAt(endBrace) == '}') {
-                         int value = pucnHash.getValue(str, startName,
-                                                       endBrace);
-                         if (value < 0) {
-                             codecs.decoding_error("unicode escape", v,
-                                  errors, "Invalid Unicode Character Name");
-                             v.append('\\');
-                             v.append(str.charAt(s-1));
-                             break;
-                         }
-
-                         if (value < 1<<16) {
-                             /* In UCS-2 range, easy solution.. */
-                             v.append((char) value);
-                         } else {
-                             /* Oops, its in UCS-4 space, */
-                             /*  compute and append the two surrogates: */
-                             /*  translate from 10000..10FFFF to 0..FFFFF */
-                             value -= 0x10000;
-
-                             /* high surrogate = top 10 bits added to D800 */
-                             v.append((char) (0xD800 + (value >> 10)));
-
-                             /* low surrogate = bottom 10 bits added to DC00*/
-                             v.append((char) (0xDC00 + (value & ~0xFC00)));
+                    if(str.charAt(s) == '{') {
+                        int startName = s + 1;
+                        int endBrace = startName;
+                        /*
+                         * look for either the closing brace, or we exceed the
+                         * maximum length of the unicode character names
+                         */
+                        int maxLen = pucnHash.getCchMax();
+                        while(endBrace < end && str.charAt(endBrace) != '}'
+                                && (endBrace - startName) <= maxLen) {
+                            endBrace++;
                         }
-                        s = endBrace + 1;
+                        if(endBrace != end && str.charAt(endBrace) == '}') {
+                            int value = pucnHash.getValue(str,
+                                                          startName,
+                                                          endBrace);
+                            if(storeUnicodeCharacter(value, v)) {
+                                s = endBrace + 1;
+                            } else {
+                                s = codecs.insertReplacementAndGetResume(v,
+                                                                         errors,
+                                                                         "unicodeescape",
+                                                                         str,
+                                                                         loopStart,
+                                                                         endBrace + 1,
+                                                                         "illegal Unicode character");
+                            }
+                        } else {
+                            s = codecs.insertReplacementAndGetResume(v,
+                                                                     errors,
+                                                                     "unicodeescape",
+                                                                     str,
+                                                                     loopStart,
+                                                                     endBrace,
+                                                                     "malformed \\N character escape");
+                        }
+                        break;
                     } else {
-                         codecs.decoding_error("unicode escape", v, errors,
-                              "Unicode name missing closing brace");
-                         v.append('\\');
-                         v.append(str.charAt(s-1));
-                         break;
+                        s = codecs.insertReplacementAndGetResume(v,
+                                                                 errors,
+                                                                 "unicodeescape",
+                                                                 str,
+                                                                 loopStart,
+                                                                 s + 1,
+                                                                 "malformed \\N character escape");
                     }
                     break;
-                }
-                codecs.decoding_error("unicode escape", v, errors,
-                                      "Missing opening brace for Unicode " +
-                                      "Character Name escape");
+                default:
+                    v.append('\\');
+                    v.append(str.charAt(s - 1));
+                    break;
+            }
+        }
+        return v.toString();
+    }
 
-                /* fall through on purpose */
-           default:
-               v.append('\\');
-               v.append(str.charAt(s-1));
-               break;
-           }
-       }
-       return v.toString();
+    private static int hexescape(StringBuffer partialDecode,
+                                 String errors,
+                                 int digits,
+                                 int hexDigitStart,
+                                 String str,
+                                 int size,
+                                 String errorMessage) {
+        if(hexDigitStart + digits > size) {
+            return codecs.insertReplacementAndGetResume(partialDecode,
+                                                        errors,
+                                                        "unicodeescape",
+                                                        str,
+                                                        hexDigitStart - 2,
+                                                        size,
+                                                        errorMessage);
+        }
+        int i = 0;
+        int x = 0;
+        for(; i < digits; ++i) {
+            char c = str.charAt(hexDigitStart + i);
+            int d = Character.digit(c, 16);
+            if(d == -1) {
+                return codecs.insertReplacementAndGetResume(partialDecode,
+                                                            errors,
+                                                            "unicodeescape",
+                                                            str,
+                                                            hexDigitStart - 2,
+                                                            hexDigitStart + i + 1,
+                                                            errorMessage);
+            }
+            x = (x << 4) & ~0xF;
+            if(c >= '0' && c <= '9')
+                x += c - '0';
+            else if(c >= 'a' && c <= 'f')
+                x += 10 + c - 'a';
+            else
+                x += 10 + c - 'A';
+        }
+        if(storeUnicodeCharacter(x, partialDecode)) {
+            return hexDigitStart + i;
+        } else {
+            return codecs.insertReplacementAndGetResume(partialDecode,
+                                                        errors,
+                                                        "unicodeescape",
+                                                        str,
+                                                        hexDigitStart - 2,
+                                                        hexDigitStart + i + 1,
+                                                        "illegal Unicode character");
+        }
     }
 
+    /*pass in an int since this can be a UCS-4 character */
+    private static boolean storeUnicodeCharacter(int value,
+                                                 StringBuffer partialDecode) {
+        if(value < 0) {
+            return false;
+        } else if(value < 1 << 16) {
+            /* In UCS-2 range, easy solution.. */
+            partialDecode.append((char)value);
+            return true;
+        } else if(value <= 0x10ffff) {
+            /* Oops, its in UCS-4 space, */
+            /* compute and append the two surrogates: */
+            /* translate from 10000..10FFFF to 0..FFFFF */
+            value -= 0x10000;
+            /* high surrogate = top 10 bits added to D800 */
+            partialDecode.append((char)(0xD800 + (value >> 10)));
+            /* low surrogate = bottom 10 bits added to DC00 */
+            partialDecode.append((char)(0xDC00 + (value & ~0xFC00)));
+            return true;
+        }
+        return false;
+    }
+
     public boolean equals(Object other) {
         if (!(other instanceof PyString))
             return false;
@@ -3306,7 +3414,11 @@
         for (int i=0; i<count; i++) {
             string.getChars(0, s, new_chars, i*s);
         }
-        return new PyString(new String(new_chars));
+        if (this instanceof PyUnicode) {
+            return new PyUnicode(new String(new_chars));
+        } else {
+            return new PyString(new String(new_chars));
+        }
     }
 
     final PyObject str___mul__(PyObject o) {
@@ -4871,6 +4983,11 @@
     }
 
     public String formatLong(PyString arg, char type, boolean altFlag) {
+        if(precision > 250){
+        // A magic number. Larger than in CPython.
+        throw Py.OverflowError(
+             "formatted long is too long (precision too long?)");
+        }
         String s = arg.toString();
         int end = s.length();
         int ptr = 0;
@@ -4928,6 +5045,11 @@
     }
 
     public String formatInteger(long v, int radix, boolean unsigned) {
+        if(precision > 250){
+            // A magic number. Larger than in CPython.
+            throw Py.OverflowError(
+                 "formatted integer is too long (precision too long?)");
+        }
         if (unsigned) {
             if (v < 0)
                 v = 0x100000000l + v;
@@ -4949,6 +5071,11 @@
     }
 
     public String formatFloatDecimal(double v, boolean truncate) {
+        if(precision > 250) {
+            // A magic number. Larger than in CPython.
+            throw Py.OverflowError(
+                 "formatted float is too long (precision too long?)");
+        }
         java.text.NumberFormat format = java.text.NumberFormat.getInstance(
                                            java.util.Locale.US);
         int prec = precision;
@@ -5087,11 +5214,6 @@
                 precision = getNumber();
                 if (precision < -1)
                     precision = 0;
-                if (precision > 250) {
-                    // A magic number. Larger than in CPython.
-                    throw Py.OverflowError(
-                         "formatted float is too long (precision too long?)");
-                }
 
                 c = pop();
             }
@@ -5111,7 +5233,6 @@
                 fill = '0';
             else
                 fill = ' ';
-
             switch(c) {
             case 's':
             case 'r':
Index: src/org/python/core/exceptions.java
===================================================================
--- src/org/python/core/exceptions.java	(revision 2833)
+++ src/org/python/core/exceptions.java	(working copy)
@@ -64,6 +64,10 @@
             + " |    +-- ValueError\n"
             + " |    |    |\n"
             + " |    |    +-- UnicodeError\n"
+            + " |    |        |\n"
+            + " |    |        +-- UnicodeEncodeError\n"
+            + " |    |        +-- UnicodeDecodeError\n"
+            + " |    |        +-- UnicodeTranslateError\n"
             + " |    |\n"
             + " |    +-- ReferenceError\n"
             + " |    +-- SystemError\n"
@@ -158,6 +162,15 @@
         buildClass(dict, "UnicodeError", "ValueError", "empty__init__",
                 "Unicode related error.");
 
+        buildClass(dict, "UnicodeEncodeError", "UnicodeError", "UnicodeEncodeError",
+                "Unicode encoding error.");
+
+        buildClass(dict, "UnicodeDecodeError", "UnicodeError", "UnicodeDecodeError",
+                "Unicode decoding error.");
+
+        buildClass(dict, "UnicodeTranslateError", "UnicodeError", "UnicodeTranslateError",
+                "Unicode translation error.");
+
         buildClass(dict, "KeyboardInterrupt", "StandardError", "empty__init__",
                 "Program interrupted by user.");
 
@@ -213,6 +226,9 @@
 
         buildClass(dict, "DeprecationWarning", "Warning", "empty__init__",
                 "Base class for warnings about deprecated features.");
+        
+        buildClass(dict, "PendingDeprecationWarning", "Warning", "empty__init__",
+                "Base class for warnings about features which will be deprecated in the future.");
 
         buildClass(dict, "SyntaxWarning", "Warning", "empty__init__",
                 "Base class for warnings about dubious syntax.");
@@ -222,6 +238,9 @@
 
         buildClass(dict, "OverflowWarning", "Warning", "empty__init__",
                 "Base class for warnings about numeric overflow.");
+        
+        buildClass(dict, "FutureWarning", "Warning", "empty__init__",
+                "Base class for warnings about constructs that will change semantically in the future.");
 
         ts.frame = ts.frame.f_back;
     }
@@ -308,7 +327,7 @@
     }
 
     public static PyString SyntaxError__str__(PyObject[] arg, String[] kws) {
-        ArgParser ap = new ArgParser("__init__", arg, kws, "self", "args");
+        ArgParser ap = new ArgParser("__str__", arg, kws, "self", "args");
         PyObject self = ap.getPyObject(0);
         PyString str = self.__getattr__("msg").__str__();
         PyObject filename = basename(self.__findattr__("filename"));
@@ -376,7 +395,7 @@
     }
 
     public static PyString EnvironmentError__str__(PyObject[] arg, String[] kws) {
-        ArgParser ap = new ArgParser("__init__", arg, kws, "self");
+        ArgParser ap = new ArgParser("__str__", arg, kws, "self");
         PyObject self = ap.getPyObject(0);
 
         if (self.__getattr__("filename") != Py.None) {
@@ -415,6 +434,176 @@
         }
     }
 
+    public static PyObject UnicodeError(PyObject[] arg, String[] kws) {
+        PyObject dict = empty__init__(arg, kws);
+        dict.__setitem__("__init__", getJavaFunc("UnicodeError__init__"));
+        return dict;
+    }
+    
+    public static void UnicodeError__init__(PyObject[] arg, String[] kws, PyObject objectType) {
+        ArgParser ap = new ArgParser("__init__", arg, kws, "self", "args");
+        PyObject self = ap.getPyObject(0);
+        PyObject args = ap.getList(1);
+        self.__setattr__("args", args);
+        if(args.__len__() != 5){
+            throw Py.TypeError("");
+        }
+        if(!isPyString(args.__getitem__(0))
+                || !Py.isInstance(args.__getitem__(1), objectType)||
+                !isPyInt(args.__getitem__(2)) ||
+                         !isPyInt(args.__getitem__(3))
+                         || !isPyString(args.__getitem__(4))){
+            throw Py.TypeError("");
+        }
+        self.__setattr__("encoding", args.__getitem__(0));
+        self.__setattr__("object", args.__getitem__(1));
+        self.__setattr__("start", args.__getitem__(2));
+        self.__setattr__("end", args.__getitem__(3));
+        self.__setattr__("reason", args.__getitem__(4));
+    }
+
+    private static boolean isPyInt(PyObject object) {
+        return Py.isInstance(object, PyType.fromClass(PyInteger.class));
+     }
+
+    private static boolean isPyString(PyObject item) {
+        return Py.isInstance(item, PyType.fromClass(PyString.class));
+    }
+    
+    public static void UnicodeDecodeError__init__(PyObject[] arg, String[] kws) {
+        UnicodeError__init__(arg, kws, PyType.fromClass(PyString.class));
+    }
+
+    public static PyString UnicodeDecodeError__str__(PyObject[] arg, String[] kws) {
+        ArgParser ap = new ArgParser("__str__", arg, kws, "self");
+        PyObject self = ap.getPyObject(0);
+        int start = ((PyInteger)self.__getattr__("start")).getValue();
+        int end = ((PyInteger)self.__getattr__("end")).getValue();
+
+        if(end == (start + 1)) {
+            PyInteger badByte = new PyInteger((int)(self.__getattr__("object")
+                    .toString().charAt(start)) & 0xff);
+            return Py.newString("'%.400s' codec can't decode byte 0x%02x in position %d: %.400s")
+                    .__mod__(new PyTuple(new PyObject[] {self.__getattr__("encoding"),
+                                                         badByte,
+                                                         self.__getattr__("start"),
+                                                         self.__getattr__("reason")}))
+                    .__str__();
+        } else {
+            return Py.newString("'%.400s' codec can't decode bytes in position %d-%d: %.400s")
+                    .__mod__(new PyTuple(new PyObject[] {self.__getattr__("encoding"),
+                                                         self.__getattr__("start"),
+                                                         new PyInteger(end - 1),
+                                                         self.__getattr__("reason")}))
+                    .__str__();
+        } 
+    }
+
+    public static PyObject UnicodeDecodeError(PyObject[] arg, String[] kws) {
+        PyObject dict = empty__init__(arg, kws);
+        dict.__setitem__("__init__", getJavaFunc("UnicodeDecodeError__init__"));
+        dict.__setitem__("__str__", getJavaFunc("UnicodeDecodeError__str__"));
+        return dict;
+    }
+    
+    public static void UnicodeEncodeError__init__(PyObject[] arg, String[] kws) {
+        UnicodeError__init__(arg, kws, PyType.fromClass(PyBaseString.class));
+    }
+
+    public static PyString UnicodeEncodeError__str__(PyObject[] arg, String[] kws) {
+        ArgParser ap = new ArgParser("__str__", arg, kws, "self");
+        PyObject self = ap.getPyObject(0);
+        int start = ((PyInteger)self.__getattr__("start")).getValue();
+        int end = ((PyInteger)self.__getattr__("end")).getValue();
+
+        if(end == (start + 1)) {
+            int badchar = (int)(self.__getattr__("object").toString().charAt(start));
+            String format;
+            if(badchar <= 0xff)
+                format = "'%.400s' codec can't encode character u'\\x%02x' in position %d: %.400s";
+            else if(badchar <= 0xffff)
+                format = "'%.400s' codec can't encode character u'\\u%04x' in position %d: %.400s";
+            else
+                format = "'%.400s' codec can't encode character u'\\U%08x' in position %d: %.400s";
+            return Py.newString(format)
+                    .__mod__(new PyTuple(new PyObject[] {self.__getattr__("encoding"),
+                                                         new PyInteger(badchar),
+                                                         self.__getattr__("start"),
+                                                         self.__getattr__("reason")}))
+                    .__str__();
+        } else {
+            return Py.newString("'%.400s' codec can't encode characters in position %d-%d: %.400s")
+                    .__mod__(new PyTuple(new PyObject[] {self.__getattr__("encoding"),
+                                                         self.__getattr__("start"),
+                                                         new PyInteger(end - 1),
+                                                         self.__getattr__("reason")}))
+                    .__str__();
+        } 
+    }
+
+    public static PyObject UnicodeEncodeError(PyObject[] arg, String[] kws) {
+        PyObject dict = empty__init__(arg, kws);
+        dict.__setitem__("__init__", getJavaFunc("UnicodeEncodeError__init__"));
+        dict.__setitem__("__str__", getJavaFunc("UnicodeEncodeError__str__"));
+        return dict;
+    }
+    
+    public static void UnicodeTranslateError__init__(PyObject[] arg, String[] kws) {
+        ArgParser ap = new ArgParser("__init__", arg, kws, "self", "args");
+        PyObject self = ap.getPyObject(0);
+        PyObject args = ap.getList(1);
+        if(args.__len__() != 4){
+            throw Py.TypeError("");
+        }
+        if(!Py.isInstance(args.__getitem__(0), PyType.fromClass(PyBaseString.class))||
+                !isPyInt(args.__getitem__(1)) ||
+                         !isPyInt(args.__getitem__(2))
+                         || !isPyString(args.__getitem__(3))){
+            throw Py.TypeError("");
+        }
+        self.__setattr__("args", args);
+        self.__setattr__("object", args.__getitem__(0));
+        self.__setattr__("start", args.__getitem__(1));
+        self.__setattr__("end", args.__getitem__(2));
+        self.__setattr__("reason", args.__getitem__(3));
+    }
+
+    public static PyString UnicodeTranslateError__str__(PyObject[] arg, String[] kws) {
+        ArgParser ap = new ArgParser("__str__", arg, kws, "self");
+        PyObject self = ap.getPyObject(0);
+        int start = ((PyInteger)self.__getattr__("start")).getValue();
+        int end = ((PyInteger)self.__getattr__("end")).getValue();
+
+        if(end == (start + 1)) {
+            int badchar = (int)(self.__getattr__("object").toString().charAt(start));
+            String format;
+            if(badchar <= 0xff)
+                format = "can't translate character u'\\x%02x' in position %d: %.400s";
+            else if(badchar <= 0xffff)
+                format = "can't translate character u'\\u%04x' in position %d: %.400s";
+            else
+                format = "can't translate character u'\\U%08x' in position %d: %.400s";
+            return Py.newString(format)
+                    .__mod__(new PyTuple(new PyObject[] {new PyInteger(badchar),
+                                                         self.__getattr__("start"),
+                                                         self.__getattr__("reason")}))
+                    .__str__();
+        } else {
+            return Py.newString("can't translate characters in position %d-%d: %.400s")
+                    .__mod__(new PyTuple(new PyObject[] {self.__getattr__("start"),
+                                                         new PyInteger(end - 1),
+                                                         self.__getattr__("reason")}))
+                    .__str__();
+        } 
+    }
+
+    public static PyObject UnicodeTranslateError(PyObject[] arg, String[] kws) {
+        PyObject dict = empty__init__(arg, kws);
+        dict.__setitem__("__init__", getJavaFunc("UnicodeTranslateError__init__"));
+        dict.__setitem__("__str__", getJavaFunc("UnicodeTranslateError__str__"));
+        return dict;
+    }
+
     private static PyObject getJavaFunc(String name) {
         return Py.newJavaFunc(exceptions.class, name);
     }
Index: src/org/python/core/Py.java
===================================================================
--- src/org/python/core/Py.java	(revision 2833)
+++ src/org/python/core/Py.java	(working copy)
@@ -229,6 +229,49 @@
         return new PyException(Py.UnicodeError, message);
     }
 
+    public static PyObject UnicodeTranslateError;
+    public static PyException UnicodeTranslateError(String object,
+                                                 int start,
+                                                 int end,
+                                                 String reason) {
+        return new PyException(Py.UnicodeTranslateError,
+                               new PyTuple(new PyObject[] {
+                                                           new PyString(object),
+                                                           new PyInteger(start),
+                                                           new PyInteger(end),
+                                                           new PyString(reason)}));
+    }
+
+    public static PyObject UnicodeDecodeError;
+
+    public static PyException UnicodeDecodeError(String encoding,
+                                                 String object,
+                                                 int start,
+                                                 int end,
+                                                 String reason) {
+        return new PyException(Py.UnicodeDecodeError,
+                               new PyTuple(new PyObject[] {new PyString(encoding),
+                                                           new PyString(object),
+                                                           new PyInteger(start),
+                                                           new PyInteger(end),
+                                                           new PyString(reason)}));
+    }
+
+    public static PyObject UnicodeEncodeError;
+
+    public static PyException UnicodeEncodeError(String encoding,
+                                                 String object,
+                                                 int start,
+                                                 int end,
+                                                 String reason) {
+        return new PyException(Py.UnicodeEncodeError,
+                               new PyTuple(new PyObject[] {new PyString(encoding),
+                                                           new PyString(object),
+                                                           new PyInteger(start),
+                                                           new PyInteger(end),
+                                                           new PyString(reason)}));
+    }
+
     public static PyObject EOFError;
     public static PyException EOFError(String message) {
         return new PyException(Py.EOFError, message);
@@ -274,6 +317,11 @@
     public static void DeprecationWarning(String message) {
         warning(DeprecationWarning, message);
     }
+    
+    public static PyObject PendingDeprecationWarning;
+    public static void PendingDeprecationWarning(String message) {
+        warning( PendingDeprecationWarning, message);
+    }
 
     public static PyObject SyntaxWarning;
     public static void SyntaxWarning(String message) {
@@ -289,6 +337,11 @@
     public static void RuntimeWarning(String message) {
         warning(RuntimeWarning, message);
     }
+    
+    public static PyObject FutureWarning;
+    public static void FutureWarning(String message) {
+        warning(FutureWarning, message);
+    }
 
     private static PyObject warnings_mod;
     private static PyObject importWarnings() {
@@ -611,15 +664,20 @@
         FloatingPointError  = initExc("FloatingPointError", exc, dict);
         ValueError          = initExc("ValueError", exc, dict);
         UnicodeError        = initExc("UnicodeError", exc, dict);
+        UnicodeEncodeError  = initExc("UnicodeEncodeError", exc, dict);
+        UnicodeDecodeError  = initExc("UnicodeDecodeError", exc, dict);
+        UnicodeTranslateError  = initExc("UnicodeTranslateError", exc, dict);
         ReferenceError      = initExc("ReferenceError", exc, dict);
         SystemError         = initExc("SystemError", exc, dict);
         MemoryError         = initExc("MemoryError", exc, dict);
         Warning             = initExc("Warning", exc, dict);
         UserWarning         = initExc("UserWarning", exc, dict);
         DeprecationWarning  = initExc("DeprecationWarning", exc, dict);
+        PendingDeprecationWarning      = initExc("PendingDeprecationWarning", exc, dict);
         SyntaxWarning       = initExc("SyntaxWarning", exc, dict);
         OverflowWarning     = initExc("OverflowWarning", exc, dict);
         RuntimeWarning      = initExc("RuntimeWarning", exc, dict);
+        FutureWarning      = initExc("FutureWarning", exc, dict);        
     }
 
     public static PySystemState defaultSystemState;
Index: src/org/python/modules/_codecs.java
===================================================================
--- src/org/python/modules/_codecs.java	(revision 2833)
+++ src/org/python/modules/_codecs.java	(working copy)
@@ -10,9 +10,12 @@
 
 import org.python.core.Py;
 import org.python.core.PyInteger;
+import org.python.core.PyNone;
 import org.python.core.PyObject;
 import org.python.core.PyString;
+import org.python.core.PySystemState;
 import org.python.core.PyTuple;
+import org.python.core.PyUnicode;
 import org.python.core.codecs;
 
 public class _codecs {
@@ -26,10 +29,22 @@
         return codecs.lookup(encoding);
     }
 
+    public static PyObject lookup_error(String handlerName) {
+        return codecs.lookup_error(handlerName);
+    }
 
+    public static void register_error(String name, PyObject errorHandler) {
+        codecs.register_error(name, errorHandler);
+    }
 
+    private static PyTuple decode_tuple(String s, int len) {
+        return new PyTuple(new PyObject[] {
+            new PyUnicode(s),
+            Py.newInteger(len)
+        });
+    }
 
-    private static PyTuple codec_tuple(String s, int len) {
+    private static PyTuple encode_tuple(String s, int len) {
         return new PyTuple(new PyObject[] {
             Py.java2py(s),
             Py.newInteger(len)
@@ -45,7 +60,7 @@
 
     public static PyTuple utf_8_decode(String str, String errors) {
         int size = str.length();
-        return codec_tuple(codecs.PyUnicode_DecodeUTF8(str, errors), size);
+        return decode_tuple(codecs.PyUnicode_DecodeUTF8(str, errors), size);
     }
 
 
@@ -55,55 +70,115 @@
 
     public static PyTuple utf_8_encode(String str, String errors) {
         int size = str.length();
-        return codec_tuple(codecs.PyUnicode_EncodeUTF8(str, errors), size);
+        return encode_tuple(codecs.PyUnicode_EncodeUTF8(str, errors), size);
     }
 
 
+    /* --- UTF-7 Codec --------------------------------------------------- */
 
+    public static PyTuple utf_7_decode(String str) {
+        return utf_7_decode(str, null);
+    }
+
+    public static PyTuple utf_7_decode(String str, String errors) {
+        int size = str.length();
+        return decode_tuple(codecs.PyUnicode_DecodeUTF7(str, errors), size);
+    }
+
+
+    public static PyTuple utf_7_encode(String str) {
+        return utf_7_encode(str, null);
+    }
+
+    public static PyTuple utf_7_encode(String str, String errors) {
+        int size = str.length();
+        return encode_tuple(codecs.PyUnicode_EncodeUTF7(str, false, false, errors), size);
+    }
+    
+    public static PyTuple escape_decode(String str){
+        return escape_decode(str, null);
+    }
+
+    public static PyTuple escape_decode(String str, String errors) {
+        return decode_tuple(PyString.decode_UnicodeEscape(str,
+                                                          0,
+                                                          str.length(),
+                                                          errors,
+                                                          true), str.length());
+    }
+    
+    public static PyTuple escape_encode(String str){
+        return escape_encode(str, null);
+    }   
+    
+    public static PyTuple escape_encode(String str, String errors) {
+        return encode_tuple(PyString.encode_UnicodeEscape(str, false),
+                                                                                str.length());
+       
+    }
+
     /* --- Character Mapping Codec --------------------------------------- */
 
-    public static PyTuple charmap_decode(String str, String errors,
+    public static PyTuple charmap_decode(String str,
+                                         String errors,
                                          PyObject mapping) {
+        return charmap_decode(str, errors, mapping, false);
+    }
+
+    public static PyTuple charmap_decode(String str,
+                                         String errors,
+                                         PyObject mapping, boolean ignoreUnmapped) {
+    
+    
         int size = str.length();
         StringBuffer v = new StringBuffer(size);
-
-        for (int i = 0; i < size; i++) {
+        for(int i = 0; i < size; i++) {
             char ch = str.charAt(i);
-            if (ch > 0xFF) {
-                codecs.decoding_error("charmap", v, errors,
-                                      "ordinal not in range(255)");
-                i++;
+            if(ch > 0xFF) {
+                i = codecs.insertReplacementAndGetResume(v,
+                                                         errors,
+                                                         "charmap",
+                                                         str,
+                                                         i,
+                                                         i + 1,
+                                                         "ordinal not in range(255)") - 1;
                 continue;
             }
-
             PyObject w = Py.newInteger(ch);
             PyObject x = mapping.__finditem__(w);
-            if (x == null) {
-                /* No mapping found: default to Latin-1 mapping if possible */
-                v.append(ch);
+            if(x == null) {
+                if(ignoreUnmapped){
+                    v.append(ch);
+                }else{
+i = codecs.insertReplacementAndGetResume(v, errors, "charmap", str, i, i + 1, "no mapping found") - 1;
+                }
                 continue;
             }
-
             /* Apply mapping */
-            if (x instanceof PyInteger) {
-                int value = ((PyInteger) x).getValue();
-                if (value < 0 || value > 65535)
-                    throw Py.TypeError(
-                             "character mapping must be in range(65535)");
-                v.append((char) value);
-            } else if (x == Py.None) {
-                codecs.decoding_error("charmap", v,  errors,
-                                      "character maps to <undefined>");
-            } else if (x instanceof PyString) {
+            if(x instanceof PyInteger) {
+                int value = ((PyInteger)x).getValue();
+                if(value < 0 || value > PySystemState.maxunicode) {
+                    throw Py.TypeError("character mapping must return "
+                            + "integer greater than 0 and less than sys.maxunicode");
+                }
+                v.append((char)value);
+            } else if(x == Py.None) {
+                i = codecs.insertReplacementAndGetResume(v,
+                                                         errors,
+                                                         "charmap",
+                                                         str,
+                                                         i,
+                                                         i + 1,
+                                                         "character maps to <undefined>") - 1;
+            } else if(x instanceof PyString) {
                 v.append(x.toString());
-            }
-            else {
+            } else {
                 /* wrong return value */
-                throw Py.TypeError("character mapping must return integer, " +
-                                   "None or unicode");
+                throw Py.TypeError("character mapping must return "
+                        + "integer, None or str");
             }
         }
-        return codec_tuple(v.toString(), size);
+        return decode_tuple(v.toString(), size);
     }
 
 
@@ -112,54 +187,93 @@
 
     public static PyTuple charmap_encode(String str, String errors,
                                          PyObject mapping) {
+        //Default to Latin-1
+        if(mapping == null){
+            return latin_1_encode(str, errors);
+        }
+        return charmap_encode_internal(str, errors, mapping, new StringBuffer(str.length()), true);
+    }
+    
+    private static PyTuple charmap_encode_internal(String str,
+                                                   String errors,
+                                                   PyObject mapping,
+                                                   StringBuffer v,
+                                                   boolean letLookupHandleError) {
         int size = str.length();
-        StringBuffer v = new StringBuffer(size);
-
-        for (int i = 0; i < size; i++) {
+        for(int i = 0; i < size; i++) {
             char ch = str.charAt(i);
             PyObject w = Py.newInteger(ch);
             PyObject x = mapping.__finditem__(w);
-            if (x == null) {
-                /* No mapping found: default to Latin-1 mapping if possible */
-                if (ch < 256)
-                    v.append(ch);
-                else
-                    codecs.encoding_error("charmap", v, errors,
-                                          "missing character mapping");
-                continue;
-            }
-            if (x instanceof PyInteger) {
-                int value = ((PyInteger) x).getValue();
-                if (value < 0 || value > 255)
-                    throw Py.TypeError(
-                            "character mapping must be in range(256)");
-                v.append((char) value);
-            } else if (x == Py.None) {
-                codecs.encoding_error("charmap", v,  errors,
-                                      "character maps to <undefined>");
-            } else if (x instanceof PyString) {
+            if(x == null) {
+                if(letLookupHandleError) {
+                    i = handleBadMapping(str, errors, mapping, v, size, i);
+                } else {
+                    throw Py.UnicodeEncodeError("charmap",
+                                                str,
+                                                i,
+                                                i + 1,
+                                                "character maps to <undefined>");
+                }
+            }else 
+            if(x instanceof PyInteger) {
+                int value = ((PyInteger)x).getValue();
+                if(value < 0 || value > 255)
+                    throw Py.TypeError("character mapping must be in range(256)");
+                v.append((char)value);
+            }  else if(x instanceof PyString  && !(x instanceof PyUnicode)) {
                 v.append(x.toString());
-            }
-            else {
+            } else if(x instanceof PyNone){
+                i = handleBadMapping(str, errors, mapping, v, size, i);
+            }else {
                 /* wrong return value */
-                throw Py.TypeError("character mapping must return " +
-                                   "integer, None or unicode");
+                throw Py.TypeError("character mapping must return "
+                        + "integer, None or str");
             }
         }
-        return codec_tuple(v.toString(), size);
+        return encode_tuple(v.toString(), size);
     }
 
+    private static int handleBadMapping(String str,
+                                        String errors,
+                                        PyObject mapping,
+                                        StringBuffer v,
+                                        int size,
+                                        int i) {
+        if(errors != null) {
+            if(errors.equals(codecs.IGNORE)) {
+                return i;
+            } else if(errors.equals(codecs.REPLACE)) {
+                charmap_encode_internal("?", errors, mapping, v, false);
+                return i;
+            } else if(errors.equals(codecs.XMLCHARREFREPLACE)) {
+                charmap_encode_internal(codecs.xmlcharrefreplace(i, i + 1, str)
+                        .toString(), errors, mapping, v, false);
+                return i;
+            } else if(errors.equals(codecs.BACKSLASHREPLACE)) {
+                charmap_encode_internal(codecs.backslashreplace(i, i + 1, str)
+                        .toString(), errors, mapping, v, false);
+                return i;
+            }
+        }
+        PyObject replacement = codecs.encoding_error(errors,
+                                                     "charmap",
+                                                     str,
+                                                     i,
+                                                     i + 1,
+                                                     "character maps to <undefined>");
+        String replStr = replacement.__getitem__(0).toString();
+        charmap_encode_internal(replStr, errors, mapping, v, false);
+        return codecs.calcNewPosition(size, replacement) - 1;
+    }
 
 
-    /* --- 7-bit ASCII Codec -------------------------------------------- */
-
     public static PyTuple ascii_decode(String str) {
         return ascii_decode(str, null);
     }
 
     public static PyTuple ascii_decode(String str, String errors) {
         int size = str.length();
-        return codec_tuple(codecs.PyUnicode_DecodeASCII(str, size, errors),
+        return decode_tuple(codecs.PyUnicode_DecodeASCII(str, size, errors),
                                                                         size);
     }
 
@@ -170,7 +284,7 @@
 
     public static PyTuple ascii_encode(String str, String errors) {
         int size = str.length();
-        return codec_tuple(codecs.PyUnicode_EncodeASCII(str, size, errors),
+        return encode_tuple(codecs.PyUnicode_EncodeASCII(str, size, errors),
                                                                         size);
     }
 
@@ -183,21 +297,8 @@
 
     public static PyTuple latin_1_decode(String str, String errors) {
         int size = str.length();
-        StringBuffer v = new StringBuffer(size);
-
-        for (int i = 0; i < size; i++) {
-            char ch = str.charAt(i);
-            if (ch < 256) {
-                v.append(ch);
-            } else {
-                codecs.decoding_error("latin-1", v, errors,
-                                      "ordinal not in range(256)");
-                i++;
-                continue;
-            }
-        }
-
-        return codec_tuple(v.toString(), size);
+        return decode_tuple(codecs.PyUnicode_DecodeLatin1(str, size, errors),
+                                                                        size);
     }
 
 
@@ -207,17 +308,7 @@
 
     public static PyTuple latin_1_encode(String str, String errors) {
         int size = str.length();
-        StringBuffer v = new StringBuffer(size);
-
-        for (int i = 0; i < size; i++) {
-            char ch = str.charAt(i);
-            if (ch >= 256) {
-                codecs.encoding_error("latin-1", v, errors,
-                                      "ordinal not in range(256)");
-            } else
-                v.append(ch);
-        }
-        return codec_tuple(v.toString(), size);
+        return encode_tuple(codecs.PyUnicode_EncodeLatin1(str, size, errors), size);
     }
 
 
@@ -229,12 +320,12 @@
     }
 
     public static PyTuple utf_16_encode(String str, String errors) {
-        return codec_tuple(encode_UTF16(str, errors, 0), str.length());
+        return encode_tuple(encode_UTF16(str, errors, 0), str.length());
     }
 
     public static PyTuple utf_16_encode(String str, String errors,
                                        int byteorder) {
-        return codec_tuple(encode_UTF16(str, errors, byteorder),
+        return encode_tuple(encode_UTF16(str, errors, byteorder),
                            str.length());
     }
 
@@ -243,7 +334,7 @@
     }
 
     public static PyTuple utf_16_le_encode(String str, String errors) {
-        return codec_tuple(encode_UTF16(str, errors, -1), str.length());
+        return encode_tuple(encode_UTF16(str, errors, -1), str.length());
     }
 
     public static PyTuple utf_16_be_encode(String str) {
@@ -251,7 +342,7 @@
     }
 
     public static PyTuple utf_16_be_encode(String str, String errors) {
-        return codec_tuple(encode_UTF16(str, errors, 1), str.length());
+        return encode_tuple(encode_UTF16(str, errors, 1), str.length());
     }
 
 
@@ -291,14 +382,13 @@
     }
 
     public static PyTuple utf_16_decode(String str, String errors) {
-        int[] bo = new int[] { 0 };
-        return codec_tuple(decode_UTF16(str, errors, bo), str.length());
+        return utf_16_decode(str, errors, 0);
     }
 
     public static PyTuple utf_16_decode(String str, String errors,
                                         int byteorder) {
         int[] bo = new int[] { byteorder };
-        return codec_tuple(decode_UTF16(str, errors, bo), str.length());
+        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
     }
 
     public static PyTuple utf_16_le_decode(String str) {
@@ -307,7 +397,7 @@
 
     public static PyTuple utf_16_le_decode(String str, String errors) {
         int[] bo = new int[] { -1 };
-        return codec_tuple(decode_UTF16(str, errors, bo), str.length());
+        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
     }
 
     public static PyTuple utf_16_be_decode(String str) {
@@ -316,7 +406,7 @@
 
     public static PyTuple utf_16_be_decode(String str, String errors) {
         int[] bo = new int[] { 1 };
-        return codec_tuple(decode_UTF16(str, errors, bo), str.length());
+        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
     }
 
     public static PyTuple utf_16_ex_decode(String str) {
@@ -338,67 +428,70 @@
         });
     }
 
-    private static String decode_UTF16(String str, String errors,
+    private static String decode_UTF16(String str,
+                                       String errors,
                                        int[] byteorder) {
         int bo = 0;
-        if (byteorder != null)
-             bo = byteorder[0];
-
+        if(byteorder != null)
+            bo = byteorder[0];
         int size = str.length();
-
-        if (size % 2 != 0)
-            codecs.decoding_error("UTF16", null, errors, "truncated data");
-
-        StringBuffer v = new StringBuffer(size/2);
-
-        for (int i = 0; i < size; i += 2) {
+        StringBuffer v = new StringBuffer(size / 2);
+        for(int i = 0; i < size; i += 2) {
             char ch1 = str.charAt(i);
-            char ch2 = str.charAt(i+1);
-            if (ch1 == 0xFE && ch2 == 0xFF) {
+            if(i + 1 == size) {
+                i = codecs.insertReplacementAndGetResume(v,
+                                                         errors,
+                                                         "utf-16",
+                                                         str,
+                                                         i,
+                                                         i + 1,
+                                                         "truncated data");
+                continue;
+            }
+            char ch2 = str.charAt(i + 1);
+            if(ch1 == 0xFE && ch2 == 0xFF) {
                 bo = 1;
                 continue;
-            } else if (ch1 == 0xFF && ch2 == 0xFE) {
+            } else if(ch1 == 0xFF && ch2 == 0xFE) {
                 bo = -1;
                 continue;
             }
-
             char ch;
-            if (bo == -1)
-                ch = (char) (ch2 << 8 | ch1);
+            if(bo == -1)
+                ch = (char)(ch2 << 8 | ch1);
             else
-                ch = (char) (ch1 << 8 | ch2);
-
-            if (ch < 0xD800 || ch > 0xDFFF) {
+                ch = (char)(ch1 << 8 | ch2);
+            if(ch < 0xD800 || ch > 0xDFFF) {
                 v.append(ch);
                 continue;
             }
-
-
-            /* UTF-16 code pair: */
-            if (i == size-1) {
-                codecs.decoding_error("UTF-16", v, errors,
-                                      "unexpected end of data");
-                continue;
-            }
-
             ch = str.charAt(++i);
-            if (0xDC00 <= ch && ch <= 0xDFFF) {
-                ch = str.charAt(++i);
-                if (0xD800 <= ch && ch <= 0xDBFF)
-                    /* This is valid data (a UTF-16 surrogate pair), but
-                       we are not able to store this information since our
-                       Py_UNICODE type only has 16 bits... this might
-                       change someday, even though it's unlikely. */
-                    codecs.decoding_error("UTF-16", v, errors,
-                                          "code pairs are not supported");
+            if(0xDC00 <= ch && ch <= 0xDFFF) {
+                ch2 = str.charAt(++i);
+                if(0xD800 <= ch2 && ch2 <= 0xDBFF) {
+                    v.append(ch);
+                    v.append(ch2);
+                    continue;
+                }
+                i = codecs.insertReplacementAndGetResume(v,
+                                                         errors,
+                                                         "utf-16",
+                                                         str,
+                                                         i,
+                                                         i + 1,
+                                                         "illegal UTF-16 surrogate");
                 continue;
             }
-            codecs.decoding_error("UTF-16", v, errors, "illegal encoding");
+            i = codecs.insertReplacementAndGetResume(v,
+                                                     errors,
+                                                     "utf-16",
+                                                     str,
+                                                     i,
+                                                     i + 1,
+                                                     "illegal encoding");
         }
-
-        if (byteorder != null)
+        if(byteorder != null)
             byteorder[0] = bo;
-
         return v.toString();
     }
 
@@ -413,7 +506,7 @@
 
     public static PyTuple raw_unicode_escape_encode(String str,
                                                    String errors) {
-        return codec_tuple(codecs.PyUnicode_EncodeRawUnicodeEscape(str,
+        return encode_tuple(codecs.PyUnicode_EncodeRawUnicodeEscape(str,
                                                              errors, false),
                            str.length());
     }
@@ -425,7 +518,7 @@
 
     public static PyTuple raw_unicode_escape_decode(String str,
                                                     String errors) {
-        return codec_tuple(codecs.PyUnicode_DecodeRawUnicodeEscape(str,
+        return decode_tuple(codecs.PyUnicode_DecodeRawUnicodeEscape(str,
                                                              errors),
                            str.length());
     }
@@ -440,7 +533,7 @@
     }
 
     public static PyTuple unicode_escape_encode(String str, String errors) {
-        return codec_tuple(PyString.encode_UnicodeEscape(str, false),
+        return encode_tuple(PyString.encode_UnicodeEscape(str, false),
                            str.length());
     }
 
@@ -450,12 +543,13 @@
 
     public static PyTuple unicode_escape_decode(String str, String errors) {
         int n = str.length();
-        return codec_tuple(PyString.decode_UnicodeEscape(str,
-                                                     0, n, errors, true), n);
+        return decode_tuple(PyString.decode_UnicodeEscape(str,
+                                                          0,
+                                                          n,
+                                                          errors,
+                                                          true), n);
     }
 
-
-
     /* --- UnicodeInternal Codec ------------------------------------------ */
 
 
@@ -464,7 +558,7 @@
     }
 
     public static PyTuple unicode_internal_encode(String str, String errors) {
-        return codec_tuple(str, str.length());
+        return encode_tuple(str, str.length());
     }
 
     public static PyTuple unicode_internal_decode(String str) {
@@ -472,7 +566,7 @@
     }
 
     public static PyTuple unicode_internal_decode(String str, String errors) {
-        return codec_tuple(str, str.length());
+        return decode_tuple(str, str.length());
     }
 
 }