Index: src/org/python/core/PyString.java =================================================================== --- src/org/python/core/PyString.java (revision 7099) +++ src/org/python/core/PyString.java (revision ) @@ -3,6 +3,10 @@ import java.math.BigInteger; +import org.python.core.stringlib.FieldNameIterator; +import org.python.core.stringlib.InternalFormatSpec; +import org.python.core.stringlib.InternalFormatSpecParser; +import org.python.core.stringlib.MarkupIterator; import org.python.core.util.ExtraMath; import org.python.core.util.StringUtil; import org.python.expose.ExposedMethod; @@ -2490,6 +2494,155 @@ return codecs.decode(this, encoding, errors); } + @ExposedMethod(doc = BuiltinDocs.str__formatter_parser_doc) + final PyObject str__formatter_parser() { + return new MarkupIterator(getString()); + } + + @ExposedMethod(doc = BuiltinDocs.str__formatter_field_name_split_doc) + final PyObject str__formatter_field_name_split() { + FieldNameIterator iterator = new FieldNameIterator(getString()); + Object headObj = iterator.head(); + PyObject head = headObj instanceof Integer + ? new PyInteger((Integer) headObj) + : new PyString((String) headObj); + return new PyTuple(head, iterator); + } + + @ExposedMethod(doc = BuiltinDocs.str_format_doc) + final PyObject str_format(PyObject[] args, String[] keywords) { + try { + return new PyString(buildFormattedString(getString(), args, keywords)); + } catch(IllegalArgumentException e) { + throw Py.ValueError(e.getMessage()); + } + } + + private String buildFormattedString(String value, PyObject[] args, String[] keywords) { + StringBuilder result = new StringBuilder(); + MarkupIterator it = new MarkupIterator(value); + while (true) { + MarkupIterator.Chunk chunk = it.nextChunk(); + if (chunk == null) { + break; + } + result.append(chunk.literalText); + if (chunk.fieldName.length() > 0) { + outputMarkup(result, chunk, args, keywords); + } + } + return result.toString(); + } + + private void outputMarkup(StringBuilder result, MarkupIterator.Chunk chunk, + PyObject[] args, String[] keywords) { + PyObject fieldObj = getFieldObject(chunk.fieldName, args, keywords); + if (fieldObj == null) { + return; + } + if ("r".equals(chunk.conversion)) { + fieldObj = fieldObj.__repr__(); + } + else if ("s".equals(chunk.conversion)) { + fieldObj = fieldObj.__str__(); + } + else if (chunk.conversion != null) { + throw Py.ValueError("Unknown conversion specifier " + chunk.conversion); + } + String formatSpec = chunk.formatSpec; + if (chunk.formatSpecNeedsExpanding) { + formatSpec = buildFormattedString(formatSpec, args, keywords); + } + renderField(fieldObj, formatSpec, result); + } + + private PyObject getFieldObject(String fieldName, PyObject[] args, String[] keywords) { + FieldNameIterator iterator = new FieldNameIterator(fieldName); + Object head = iterator.head(); + PyObject obj = null; + int positionalCount = args.length - keywords.length; + if (head instanceof Integer) { + int index = (Integer) head; + if (index >= positionalCount) { + throw Py.IndexError("tuple index out of range"); + } + obj = args[index]; + } + else { + for (int i = 0; i < keywords.length; i++) { + if (keywords[i].equals(head)) { + obj = args[positionalCount+i]; + break; + } + } + if (obj == null) { + throw Py.KeyError((String) head); + } + } + if (obj != null) { + while (true) { + FieldNameIterator.Chunk chunk = iterator.nextChunk(); + if (chunk == null) { + break; + } + if (chunk.is_attr) { + obj = obj.__getattr__((String) chunk.value); + } + else { + PyObject key = chunk.value instanceof String + ? new PyString((String) chunk.value) + : new PyInteger((Integer) chunk.value); + obj = obj.__getitem__(key); + } + if (obj == null) break; + } + } + return obj; + } + + private void renderField(PyObject fieldObj, String formatSpec, StringBuilder result) { + PyString formatSpecStr = formatSpec == null ? Py.EmptyString : new PyString(formatSpec); + result.append(fieldObj.__format__(formatSpecStr).asString()); + } + + @Override + public PyObject __format__(PyObject format_spec) { + return str___format__(format_spec); + } + + @ExposedMethod(doc = BuiltinDocs.str___format___doc) + final PyObject str___format__(PyObject format_spec) { + if (format_spec instanceof PyString) { + String result; + try { + String specString = ((PyString) format_spec).getString(); + InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse(); + result = formatString(getString(), spec); + } catch (IllegalArgumentException e) { + throw Py.ValueError(e.getMessage()); + } + if (format_spec instanceof PyUnicode) { + return new PyUnicode(result); + } + return new PyString(result); + } + throw Py.TypeError("__format__ requires str or unicode"); + } + + /** + * Internal implementation of str.__format__() + * + * @param text the text to format + * @param spec the PEP 3101 formatting specification + * @return the result of the formatting + */ + public static String formatString(String text, InternalFormatSpec spec) { + if (spec.precision >= 0 && text.length() > spec.precision) { + text = text.substring(0, spec.precision); + } + return spec.pad(text, '<', 0); + } + /* arguments' conversion helper */ @Override Index: src/org/python/core/PyInteger.java =================================================================== --- src/org/python/core/PyInteger.java (revision 7099) +++ src/org/python/core/PyInteger.java (revision ) @@ -7,6 +7,8 @@ import java.io.Serializable; import java.math.BigInteger; +import org.python.core.stringlib.InternalFormatSpec; +import org.python.core.stringlib.InternalFormatSpecParser; import org.python.expose.ExposedMethod; import org.python.expose.ExposedNew; import org.python.expose.ExposedType; @@ -894,6 +896,118 @@ } @Override + public PyObject __format__(PyObject format_spec) { + return int___format__(format_spec); + } + + @ExposedMethod(doc = BuiltinDocs.int___format___doc) + final PyObject int___format__(PyObject format_spec) { + return formatImpl(getValue(), format_spec); + } + + static PyObject formatImpl(Object value, PyObject format_spec) { + if (format_spec instanceof PyString) { + String result; + try { + String specString = ((PyString) format_spec).getString(); + InternalFormatSpec spec = new InternalFormatSpecParser(specString).parse(); + result = formatIntOrLong(value, spec); + } catch (IllegalArgumentException e) { + throw Py.ValueError(e.getMessage()); + } + if (format_spec instanceof PyUnicode) { + return new PyUnicode(result); + } + return new PyString(result); + } + throw Py.TypeError("__format__ requires str or unicode"); + } + + /** + * Formats an integer or long number according to a PEP-3101 format specification. + * + * @param value Integer or BigInteger object specifying the value to format. + * @param spec parsed PEP-3101 format specification. + * @return result of the formatting. + */ + public static String formatIntOrLong(Object value, InternalFormatSpec spec) { + if (spec.precision != -1) { + throw new IllegalArgumentException("Precision not allowed in integer format specifier"); + } + int sign; + if (value instanceof Integer) { + int intValue = (Integer) value; + sign = intValue < 0 ? -1 : intValue == 0 ? 0 : 1; + } + else { + sign = ((BigInteger) value).signum(); + } + String strValue; + if (spec.type == 'c') { + if (spec.sign != '\0') { + throw new IllegalArgumentException("Sign not allowed with " + + "integer format specifier 'c'"); + } + if (value instanceof Integer) { + int intValue = (Integer) value; + if (intValue > 0xffff) { + throw new IllegalArgumentException("%c arg not in range(0x10000)"); + } + strValue = Character.toString((char) intValue); + } + else { + BigInteger bigInt = (BigInteger) value; + if (bigInt.intValue() > 0xffff || bigInt.bitCount() > 16) { + throw new IllegalArgumentException("%c arg not in range(0x10000)"); + } + strValue = Character.toString((char) bigInt.intValue()); + } + } else { + int radix = 10; + if (spec.type == 'o') { + radix = 8; + } else if (spec.type == 'x' || spec.type == 'X') { + radix = 16; + } else if (spec.type == 'b') { + radix = 2; + } + + // TODO locale-specific formatting for 'n' + if (value instanceof BigInteger) { + strValue = ((BigInteger) value).toString(radix); + } + else { + strValue = Integer.toString((Integer) value, radix); + } + + if (spec.alternate) { + if (radix == 2) + strValue = "0b" + strValue; + else if (radix == 8) + strValue = "0o" + strValue; + else if (radix == 16) + strValue = "0x" + strValue; + } + if (spec.type == 'X') { + strValue = strValue.toUpperCase(); + } + + if (sign >= 0) { + if (spec.sign == '+') { + strValue = "+" + strValue; + } else if (spec.sign == ' ') { + strValue = " " + strValue; + } + } + } + if (spec.align == '=' && (sign < 0 || spec.sign == '+' || spec.sign == ' ')) { + char signChar = strValue.charAt(0); + return signChar + spec.pad(strValue.substring(1), '>', 1); + } + return spec.pad(strValue, '>', 0); + } + + @Override public boolean isIndex() { return true; } Index: src/org/python/core/PyObject.java =================================================================== --- src/org/python/core/PyObject.java (revision 7099) +++ src/org/python/core/PyObject.java (revision ) @@ -1694,6 +1694,16 @@ return false; } + public PyObject __format__(PyObject format_spec) { + return object___format__(format_spec); + } + + @ExposedMethod(doc = BuiltinDocs.object___format___doc) + final PyObject object___format__(PyObject format_spec) { + PyString str = __str__(); + return str.__format__(format_spec); + } + /** * Implements boolean not * Index: src/org/python/core/stringlib/FieldNameIterator.java =================================================================== --- src/org/python/core/stringlib/FieldNameIterator.java (revision ) +++ src/org/python/core/stringlib/FieldNameIterator.java (revision ) @@ -0,0 +1,117 @@ +package org.python.core.stringlib; + +import org.python.core.*; +import org.python.expose.ExposedMethod; +import org.python.expose.ExposedType; + +/** + * Provides an implementation of str._formatter_field_name_split() + */ +@ExposedType(name = "fieldnameiterator", base = PyObject.class, isBaseType = false) +public class FieldNameIterator extends PyObject { + private String markup; + private Object head; + private int index; + + public FieldNameIterator(String markup) { + this.markup = markup; + this.index = nextDotOrBracket(markup); + String headStr = markup.substring(0, index); + try { + this.head = Integer.parseInt(headStr); + } catch (NumberFormatException e) { + this.head = headStr; + } + } + + @Override + public PyObject __iter__() { + return this; + } + + @ExposedMethod + public PyObject fieldnameiterator___iter__() { + return this; + } + + public PyObject __iternext__() { + Chunk chunk = nextChunk(); + if (chunk == null) { + return null; + } + PyObject[] elements = new PyObject[2]; + elements [0] = new PyBoolean(chunk.is_attr); + if (chunk.value instanceof Integer) { + elements [1] = new PyInteger((Integer) chunk.value); + } + else { + elements [1] = new PyString((String) chunk.value); + } + return new PyTuple(elements); + } + + @ExposedMethod + public PyObject fieldnameiterator___iternext__() { + return __iternext__(); + } + + private int nextDotOrBracket(String markup) { + int dotPos = markup.indexOf('.', index); + if (dotPos < 0) dotPos = markup.length(); + int bracketPos = markup.indexOf('[', index); + if (bracketPos < 0) bracketPos = markup.length(); + return Math.min(dotPos, bracketPos); + } + + public Object head() { + return head; + } + + public Chunk nextChunk() { + if (index == markup.length()) { + return null; + } + Chunk chunk = new Chunk(); + if (markup.charAt(index) == '[') { + parseItemChunk(chunk); + } + else if (markup.charAt(index) == '.') { + parseAttrChunk(chunk); + } + return chunk; + } + + private void parseItemChunk(Chunk chunk) { + chunk.is_attr = false; + int endBracket = markup.indexOf(']', index+1); + if (endBracket < 0) { + throw new IllegalArgumentException("Missing ']' in format string"); + } + String itemValue = markup.substring(index+1, endBracket); + if (itemValue.length() == 0) { + throw new IllegalArgumentException("Empty attribute in format string"); + } + try { + chunk.value = Integer.parseInt(itemValue); + } catch (NumberFormatException e) { + chunk.value = itemValue; + } + index = endBracket+1; + } + + private void parseAttrChunk(Chunk chunk) { + index++; // skip dot + chunk.is_attr = true; + int pos = nextDotOrBracket(markup); + if (pos == index) { + throw new IllegalArgumentException("Empty attribute in format string"); + } + chunk.value = markup.substring(index, pos); + index = pos; + } + + public static class Chunk { + public boolean is_attr; + public Object value; // Integer or String + } +} Index: src/org/python/core/PyInstance.java =================================================================== --- src/org/python/core/PyInstance.java (revision 6894) +++ src/org/python/core/PyInstance.java (revision ) @@ -796,6 +796,19 @@ ret.getType().fastGetName())); } + @Override + public PyObject __format__(PyObject format_spec) { + return instance___format__(format_spec); + } + + @ExposedMethod + final PyObject instance___format__(PyObject format_spec) { + PyObject func = __findattr__("__format__"); + if (func == null) + return super.__format__(format_spec); + return func.__call__(format_spec); + } + // Generated by make_binops.py // Unary ops Index: CoreExposed.includes =================================================================== --- CoreExposed.includes (revision 6976) +++ CoreExposed.includes (revision ) @@ -44,6 +44,8 @@ org/python/core/PyType.class org/python/core/PyUnicode.class org/python/core/PyXRange.class +org/python/core/stringlib/MarkupIterator.class +org/python/core/stringlib/FieldNameIterator.class org/python/modules/PyStruct.class org/python/modules/PyTeeIterator.class org/python/jsr223/PyScriptEngineScope.class Index: src/org/python/core/stringlib/InternalFormatSpecParser.java =================================================================== --- src/org/python/core/stringlib/InternalFormatSpecParser.java (revision ) +++ src/org/python/core/stringlib/InternalFormatSpecParser.java (revision ) @@ -0,0 +1,83 @@ +package org.python.core.stringlib; + +/** + * Parser for PEP-3101 field format specifications. + */ +public class InternalFormatSpecParser { + private String spec; + private int index; + + public InternalFormatSpecParser(String spec) { + this.spec = spec; + this.index = 0; + } + + private static boolean isAlign(char c) { + switch(c) { + case '<': + case '>': + case '=': + case '^': + return true; + default: + return false; + } + } + + public InternalFormatSpec parse() { + InternalFormatSpec result = new InternalFormatSpec(); + if (spec.length() >= 1 && isAlign(spec.charAt(0))) { + result.align = spec.charAt(index); + index++; + } + else if (spec.length() >= 2 && isAlign(spec.charAt(1))) { + result.fill_char = spec.charAt(0); + result.align = spec.charAt(1); + index += 2; + } + if (isAt("+- ")) { + result.sign = spec.charAt(index); + index++; + } + if (isAt("#")) { + result.alternate = true; + index++; + } + if (isAt("0")) { + result.align = '='; + result.fill_char = '0'; + index++; + } + result.width = getInteger(); + if (isAt(".")) { + index++; + result.precision = getInteger(); + if (result.precision == -1) { + throw new IllegalArgumentException("Format specifier missing precision"); + } + } + if (index < spec.length()) { + result.type = spec.charAt(index); + if (index + 1 != spec.length()) { + throw new IllegalArgumentException("Invalid conversion specification"); + } + } + return result; + } + + private int getInteger() { + int value = 0; + boolean empty = true; + while (index < spec.length() && spec.charAt(index) >= '0' && spec.charAt(index) <= '9') { + value = value * 10 + spec.charAt(index) - '0'; + index++; + empty = false; + } + if (empty) return -1; + return value; + } + + private boolean isAt(String chars) { + return index < spec.length() && chars.indexOf(spec.charAt(index)) >= 0; + } +} Index: src/org/python/core/__builtin__.java =================================================================== --- src/org/python/core/__builtin__.java (revision 7221) +++ src/org/python/core/__builtin__.java (revision ) @@ -358,6 +358,7 @@ dict.__setitem__("sorted", new SortedFunction()); dict.__setitem__("all", new AllFunction()); dict.__setitem__("any", new AnyFunction()); + dict.__setitem__("format", new FormatFunction()); } public static PyObject abs(PyObject o) { @@ -1303,6 +1304,25 @@ } } +class FormatFunction extends PyBuiltinFunctionNarrow { + FormatFunction() { + super("format", 1, 2, + "format(value[, format_spec]) -> string\n\n" + + "Returns value.__format__(format_spec)\n" + + "format_spec defaults to \"\""); + } + + @Override + public PyObject __call__(PyObject arg1) { + return __call__(arg1, new PyString("")); + } + + @Override + public PyObject __call__(PyObject arg1, PyObject arg2) { + return arg1.__format__(arg2); + } +} + class MaxFunction extends PyBuiltinFunction { MaxFunction() { super("max", Index: src/org/python/core/PyLong.java =================================================================== --- src/org/python/core/PyLong.java (revision 7100) +++ src/org/python/core/PyLong.java (revision ) @@ -964,6 +964,16 @@ } @Override + public PyObject __format__(PyObject format_spec) { + return long___format__(format_spec); + } + + @ExposedMethod(doc = BuiltinDocs.long___format___doc) + final PyObject long___format__(PyObject format_spec) { + return PyInteger.formatImpl(getValue(), format_spec); + } + + @Override public boolean isIndex() { return true; } Index: src/org/python/core/stringlib/InternalFormatSpec.java =================================================================== --- src/org/python/core/stringlib/InternalFormatSpec.java (revision ) +++ src/org/python/core/stringlib/InternalFormatSpec.java (revision ) @@ -0,0 +1,42 @@ +package org.python.core.stringlib; + +/** + * Parsed PEP-3101 format specification of a single field. + */ +public final class InternalFormatSpec { + public char fill_char; + public char align; + public boolean alternate; + public char sign; + public int width = -1; + public int precision = -1; + public char type; + + public String pad(String value, char defaultAlign, int leaveWidth) { + int remaining = width - value.length() - leaveWidth; + if (remaining <= 0) { + return value; + } + StringBuilder result = new StringBuilder(); + int leading = remaining; + int useAlign = align; + if (useAlign == 0) { + useAlign = defaultAlign; + } + if (useAlign == '^') { + leading = remaining/2; + } + else if (useAlign == '<') { + leading = 0; + } + char fill = fill_char != 0 ? fill_char : ' '; + for (int i = 0; i < leading; i++) { + result.append(fill); + } + result.append(value); + for (int i = 0; i < remaining-leading; i++) { + result.append(fill); + } + return result.toString(); + } +} Index: src/org/python/core/stringlib/MarkupIterator.java =================================================================== --- src/org/python/core/stringlib/MarkupIterator.java (revision ) +++ src/org/python/core/stringlib/MarkupIterator.java (revision ) @@ -0,0 +1,159 @@ +package org.python.core.stringlib; + +import org.python.core.*; +import org.python.expose.ExposedMethod; +import org.python.expose.ExposedType; + +/** + * Provides an implementation of str._formatter_parser() + */ +@ExposedType(name = "formatteriterator", base = PyObject.class, isBaseType = false) +public class MarkupIterator extends PyObject { + private final String markup; + private int index; + + public MarkupIterator(String markup) { + this.markup = markup; + } + + @Override + public PyObject __iter__() { + return this; + } + + @ExposedMethod + public PyObject formatteriterator___iter__() { + return this; + } + + public PyObject __iternext__() { + Chunk chunk; + try { + chunk = nextChunk(); + } catch (IllegalArgumentException e) { + throw Py.ValueError(e.getMessage()); + } + if (chunk == null) { + return null; + } + PyObject[] elements = new PyObject[4]; + elements[0] = new PyString(chunk.literalText); + elements[1] = new PyString(chunk.fieldName); + if (chunk.fieldName.length() > 0) { + elements[2] = chunk.formatSpec == null ? Py.EmptyString : new PyString(chunk.formatSpec); + } + else { + elements[2] = Py.None; + } + elements[3] = chunk.conversion == null ? Py.None : new PyString(chunk.conversion); + return new PyTuple(elements); + } + + @ExposedMethod + public PyObject formatteriterator___iternext__() { + return __iternext__(); + } + + public Chunk nextChunk() { + if (index == markup.length()) { + return null; + } + Chunk result = new Chunk(); + int pos = index; + while(true) { + pos = indexOfFirst(markup, pos, '{', '}'); + if (pos >= 0 && pos < markup.length()-1 && + markup.charAt(pos+1) == markup.charAt(pos)) { + pos += 2; // skip escaped bracket + } + else if (pos >= 0 && markup.charAt(pos) == '}') { + throw new IllegalArgumentException("Single '}' encountered in format string"); + } + else { + break; + } + } + if (pos < 0) { + result.literalText = unescapeBraces(markup.substring(index)); + result.fieldName = ""; + index = markup.length(); + } + else { + result.literalText = unescapeBraces(markup.substring(index, pos)); + pos++; + int fieldStart = pos; + int count = 1; + while (pos < markup.length()) { + if (markup.charAt(pos) == '{') { + count++; + result.formatSpecNeedsExpanding = true; + } + else if (markup.charAt(pos) == '}') { + count--; + if (count == 0) { + parseField(result, markup.substring(fieldStart, pos)); + pos++; + break; + } + } + pos++; + } + if (count > 0) + throw new IllegalArgumentException("Single '{' encountered in format string"); + index = pos; + } + return result; + } + + private String unescapeBraces(String substring) { + return substring.replace("{{", "{").replace("}}", "}"); + } + + private void parseField(Chunk result, String fieldMarkup) { + int pos = indexOfFirst(fieldMarkup, 0, '!', ':'); + if (pos >= 0) { + result.fieldName = fieldMarkup.substring(0, pos); + if (fieldMarkup.charAt(pos) == '!') { + if (pos == fieldMarkup.length() - 1) { + throw new IllegalArgumentException("end of format while " + + "looking for conversion specifier"); + } + result.conversion = fieldMarkup.substring(pos + 1, pos + 2); + pos += 2; + if (pos < fieldMarkup.length()) { + if (fieldMarkup.charAt(pos) != ':') { + throw new IllegalArgumentException("expected ':' " + + "after conversion specifier"); + } + result.formatSpec = fieldMarkup.substring(pos+1); + } + } + else { + result.formatSpec = fieldMarkup.substring(pos+1); + } + } + else { + result.fieldName = fieldMarkup; + } + } + + private int indexOfFirst(String s, int start, char c1, char c2) { + int i1 = s.indexOf(c1, start); + int i2 = s.indexOf(c2, start); + if (i1 == -1) { + return i2; + } + if (i2 == -1) { + return i1; + } + return Math.min(i1, i2); + } + + public static final class Chunk { + public String literalText; + public String fieldName; + public String formatSpec; + public String conversion; + public boolean formatSpecNeedsExpanding; + } +} Index: tests/java/org/python/core/StringFormatTest.java =================================================================== --- tests/java/org/python/core/StringFormatTest.java (revision ) +++ tests/java/org/python/core/StringFormatTest.java (revision ) @@ -0,0 +1,216 @@ +package org.python.core; + +import junit.framework.TestCase; +import org.python.core.stringlib.FieldNameIterator; +import org.python.core.stringlib.InternalFormatSpec; +import org.python.core.stringlib.InternalFormatSpecParser; +import org.python.core.stringlib.MarkupIterator; + +/** + * Tests for internal bits and pieces of string.format implementation. + */ +public class StringFormatTest extends TestCase { + public void testInternalFormatSpec() { + InternalFormatSpec spec = new InternalFormatSpecParser("x").parse(); + assertEquals('x', spec.type); + + spec = new InternalFormatSpecParser("