diff -r 6cee6fef06f0 Lib/encodings/shift_jis.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/encodings/shift_jis.py Wed Jun 11 21:45:18 2014 -0600 @@ -0,0 +1,179 @@ +import codecs + +from array import array +from java.lang import StringBuilder +from java.nio import ByteBuffer, CharBuffer +from java.nio.charset import Charset +from StringIO import StringIO + + +def _process_decode_errors(encoding, input, result, errors, input_buffer, builder): + if result.isError(): + if errors == 'replace': + builder.append(u'\ufffd' * int(result.length()/2)) + elif errors == 'ignore': + pass + elif errors == 'strict': + raise UnicodeDecodeError( + encoding, + input, + input_buffer.position(), input_buffer.position() + result.length() - 1, + 'illegal multibyte sequence') + + # advance past error bytes + input_buffer.position(input_buffer.position() + result.length()) + + +def decode(input, errors='strict'): + assert errors in ('ignore', 'replace', 'strict') + input_buffer = ByteBuffer.wrap(array('b', input)) + decoder = Charset.forName('Shift_JIS').newDecoder() + output_buffer = CharBuffer.allocate(min(max(int(len(input) / 2), 256), 1024)) + builder = StringBuilder(int(decoder.averageCharsPerByte() * len(input))) + + while True: + result = decoder.decode(input_buffer, output_buffer, True) + pos = output_buffer.position() + output_buffer.rewind() + builder.append(output_buffer.subSequence(0, pos)) + if result.isUnderflow(): + break + _process_decode_errors('shift_jis', input, result, errors, input_buffer, builder) + + return builder.toString(), len(input) + + +def _get_unicode(input_buffer, result): + return input_buffer.subSequence(0, result.length()).toString() + + +def _process_encode_errors(encoding, input, result, errors, input_buffer, builder): + if result.isError(): + if errors == 'strict': + raise UnicodeEncodeError( + encoding, + input, + input_buffer.position(), input_buffer.position() + result.length() - 1, + 'illegal multibyte sequence') + elif errors == 'ignore': + pass + elif errors == 'replace': + builder.write('?' * len(_get_unicode(input_buffer, result))) + elif errors == 'xmlcharrefreplace': + for c in _get_unicode(input_buffer, result): + builder.write('&#%d;' % ord(c)) + elif errors == 'backslashreplace': + for c in _get_unicode(input_buffer, result): + d = ord(c) + if d < 0x100: + builder.write('\\x%0.2x' % d) + elif d < 0x10000: + builder.write('\\\\u%0.4x' % d) + else: + builder.write('\\\\U%0.8x' % d) + # advance past error characters + input_buffer.position(input_buffer.position() + result.length()) + + +def encode(input, errors='strict'): + assert errors in ('ignore', 'replace', 'strict', 'backslashreplace', 'xmlcharrefreplace') + # workaround non-BMP issues - need to get the exact count of chars, not codepoints + input_buffer = CharBuffer.allocate(StringBuilder(input).length()) + input_buffer.put(input) + input_buffer.rewind() + encoder = Charset.forName('Shift_JIS').newEncoder() + output_buffer = ByteBuffer.allocate(min(max(len(input) * 2, 256), 1024)) + builder = StringIO() + + while True: + result = encoder.encode(input_buffer, output_buffer, True) + pos = output_buffer.position() + output_buffer.rewind() + builder.write(output_buffer.array()[0:pos].tostring()) + if result.isUnderflow(): + break + _process_encode_errors('shift_jis', input, result, errors, input_buffer, builder) + + return builder.getvalue(), len(input) + + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self, input, errors='strict'): + return encode(input, errors) + + def decode(self, input, errors='strict'): + return decode(input, errors) + +class IncrementalEncoder(codecs.IncrementalEncoder): + + def __init__(self, errors='strict'): + assert errors in ('ignore', 'replace', 'strict', 'backslashreplace', 'xmlcharrefreplace') + self.errors = errors + self.encoder = Charset.forName('Shift_JIS').newEncoder() + self.output_buffer = ByteBuffer.allocate(1024) + + def encode(self, input, final=False): + # workaround non-BMP issues - need to get the exact count of chars, not codepoints + input_buffer = CharBuffer.allocate(StringBuilder(input).length()) + input_buffer.put(input) + input_buffer.rewind() + self.output_buffer.rewind() + builder = StringIO() + + while True: + result = self.encoder.encode(input_buffer, self.output_buffer, final) + pos = self.output_buffer.position() + self.output_buffer.rewind() + builder.write(self.output_buffer.array()[0:pos].tostring()) + if result.isUnderflow(): + break + _process_encode_errors('shift_jis', input, result, self.errors, input_buffer, builder) + + return builder.getvalue() + +class IncrementalDecoder(codecs.IncrementalDecoder): + + def __init__(self, errors='strict'): + assert errors in ('ignore', 'replace', 'strict') + self.errors = errors + self.decoder = Charset.forName('Shift_JIS').newDecoder() + self.output_buffer = ByteBuffer.allocate(1024) + + def decode(self, input, final=False): + input_buffer = ByteBuffer.wrap(array('b', input)) + builder = StringBuilder(int(self.decoder.averageCharsPerByte() * len(input))) + + while True: + result = self.decoder.decode(input_buffer, self.output_buffer, final) + pos = self.output_buffer.position() + self.output_buffer.rewind() + builder.append(self.output_buffer.subSequence(0, pos)) + if result.isUnderflow(): + break + _process_decode_errors('shift_jis', input, result, errors, input_buffer, builder) + + return builder.toString() + + +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name='shift_jis', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + diff -r 6cee6fef06f0 Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Sun Jun 08 10:03:49 2014 +0100 +++ b/Lib/test/test_codecs.py Wed Jun 11 21:45:18 2014 -0600 @@ -1347,7 +1347,7 @@ "punycode", "raw_unicode_escape", "rot_13", -# FIXME: Jython issue 1066: 'shift_jis', + "shift_jis", # FIXME: Jython issue 1066: 'shift_jis_2004', # FIXME: Jython issue 1066: 'shift_jisx0213', "tis_620",