### Eclipse Workspace Patch 1.0 #P Jython Index: Lib/unicodedata.py =================================================================== --- Lib/unicodedata.py (revision 34915) +++ Lib/unicodedata.py (working copy) @@ -3,7 +3,13 @@ import operator import os import java.lang.Character +import StringIO +try: + from imp import get_loader +except ImportError: + from pkgutil import get_loader + # this is intended as a stopgap measure; at the very least it should # be refactored so that we can avoid its slow startup time # requires java 6 for `normalize` function @@ -34,49 +40,64 @@ return float(a)/float(b) except: return None + +def _init_process_data(data): + for row in data: + cols = row.split(';') + codepoint = int(cols[0], 16) + name = cols[1] + data = ( + cols[2], + get_int(cols[3]), + cols[4], + cols[5], + get_int(cols[6]), + get_int(cols[7]), + get_numeric(cols[8]), + get_yn(cols[9])) + + if name.find('First') >= 0: + start = codepoint + elif name.find('Last') >= 0: + _segments.append((start, (start, codepoint), data)) + else: + _names[name] = codepoint + _codepoints[codepoint] = data -def init(path): - with open(os.path.join(path, 'UnicodeData.txt')) as data: - for row in data: - cols = row.split(';') - codepoint = int(cols[0], 16) - name = cols[1] - data = ( - cols[2], - get_int(cols[3]), - cols[4], - cols[5], - get_int(cols[6]), - get_int(cols[7]), - get_numeric(cols[8]), - get_yn(cols[9])) +def init(path): + loader = get_loader('unicodedata') + if loader: + _init_process_data(StringIO.StringIO(loader.get_data(os.path.join(path,'UnicodeData.txt')))) + else : + with open(os.path.join(path, 'UnicodeData.txt')) as data: + _init_process_data(data) + +def _init_east_asian_width_data(data): + for row in data: + if row.startswith('#'): + continue + row = row.partition('#')[0] + cols = row.split(';') + if len(cols) < 2: + continue + cr = cols[0].split('..') + width = cols[1].rstrip() + if len(cr) == 1: + codepoint = int(cr[0], 16) + _eaw[codepoint] = width + else: + start = int(cr[0], 16) + end = int(cr[1], 16) + _eaw_segments.append((start, (start, end), width)) + - if name.find('First') >= 0: - start = codepoint - elif name.find('Last') >= 0: - _segments.append((start, (start, codepoint), data)) - else: - _names[name] = codepoint - _codepoints[codepoint] = data - def init_east_asian_width(path): - with open(os.path.join(path, 'EastAsianWidth.txt')) as data: - for row in data: - if row.startswith('#'): - continue - row = row.partition('#')[0] - cols = row.split(';') - if len(cols) < 2: - continue - cr = cols[0].split('..') - width = cols[1].rstrip() - if len(cr) == 1: - codepoint = int(cr[0], 16) - _eaw[codepoint] = width - else: - start = int(cr[0], 16) - end = int(cr[1], 16) - _eaw_segments.append((start, (start, end), width)) + loader = get_loader('unicodedata') + if loader: + _init_east_asian_width_data(StringIO.StringIO(loader.get_data(os.path.join(path,'EastAsianWidth.txt')))) + else : + with open(os.path.join(path, 'EastAsianWidth.txt')) as data: + _init_east_asian_width_data(data) # this doesn't work in general, but it should be ok in this case since