Index: src/org/python/core/PyString.java =================================================================== --- src/org/python/core/PyString.java (revision 4811) +++ src/org/python/core/PyString.java (working copy) @@ -2186,6 +2186,99 @@ } return super.unsupportedopMessage(op, o2); } + + public PyList rsplit(){ + return str_rsplit(null,-1); + } + + public PyList rsplit(String sep) { + return str_rsplit(sep, -1); + } + + public PyList rsplit(String sep, int maxsplit) { + return str_rsplit(sep, maxsplit); + } + + @ExposedMethod(defaults = {"null", "-1"}) + final PyList str_rsplit(String sep, int maxsplit) { + if (sep != null) { + if (sep.length() == 0) { + throw Py.ValueError("empty separator"); + } + PyList list = rsplitfields(sep,maxsplit); + list.reverse(); + return list; + } + PyList list = new PyList(); + char[] chars = string.toCharArray(); + + if (maxsplit < 0) + maxsplit = chars.length; + + int splits = 0; + int i = chars.length - 1; + + while(i > -1 && Character.isWhitespace(chars[i])) + i--; + if (i == -1) + return list; + + while (splits < maxsplit){ + while(i > -1 && Character.isWhitespace(chars[i])) + i--; + if (i == -1) + break; + + int next_ws_char = i; + while (next_ws_char > -1 && !Character.isWhitespace(chars[next_ws_char])) + next_ws_char--; + if (next_ws_char == -1) + break; + + splits++; + list.add(fromSubstring(next_ws_char+1,i+1)); + i = next_ws_char; + } + while (i > -1 && Character.isWhitespace(chars[i])) + i--; + if (i > -1) + list.add(fromSubstring(0,i+1)); + //It'd be nice if lists supported a push() method. + // Can't use list.insert(0,foo) because a quadratic time would result. + list.reverse(); + return list; + } + private PyList rsplitfields(String sep, int maxsplit) { + PyList list = new PyList(); + + int length = string.length(); + if (maxsplit < 0) + maxsplit = length + 1; + + int lastbreak = length; + int splits = 0; + int index = length; + int sepLength = sep.length(); + + + while (index > 0 && splits < maxsplit){ + int i = string.lastIndexOf(sep,index-sepLength); + if (i == index) + { + i -= sepLength; + } + if (i < 0) + break; + splits++; + list.append(fromSubstring(i+sepLength,lastbreak)); + lastbreak = i; + index = i; + + } + list.append(fromSubstring(0,lastbreak)); + //list.reverse(); + return list; + } } final class StringFormatter Index: Lib/test/string_tests.py =================================================================== --- Lib/test/string_tests.py (revision 4811) +++ Lib/test/string_tests.py (working copy) @@ -185,10 +185,100 @@ self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split') self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//') - #self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test') self.checkraises(TypeError, 'hello', 'split', 42, 42, 42) + self.checkraises(ValueError, 'hello', 'split', "",42) + self.checkraises(ValueError, 'hello', 'split', "") + def test_rsplit(self): + self.checkequal(['this', 'is', 'the', 'rsplit', 'function'], + 'this is the rsplit function', 'rsplit') + # by whitespace + self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'rsplit') + self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1) + self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) + self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3) + self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4) + self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, + sys.maxint-20) + self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0) + self.checkequal(['a b c d'], 'a b c d ', 'rsplit', None, 0) + self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) + + self.checkequal([], ' ', 'rsplit') + self.checkequal(['a'], ' a ', 'rsplit') + self.checkequal(['a', 'b'], ' a b ', 'rsplit') + self.checkequal([' a', 'b'], ' a b ', 'rsplit', None, 1) + self.checkequal([' a b','c'], ' a b c ', 'rsplit', + None, 1) + self.checkequal([' a', 'b', 'c'], ' a b c ', 'rsplit', + None, 2) + self.checkequal(['a', 'b'], '\n\ta \t\r b \v ', 'rsplit', None, 88) + aaa = ' a '*20 + self.checkequal(['a']*20, aaa, 'rsplit') + self.checkequal([aaa[:-4]] + ['a'], aaa, 'rsplit', None, 1) + self.checkequal([' a a'] + ['a']*18, aaa, 'rsplit', None, 18) + + + # by a char + self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|') + self.checkequal(['a|b|c', 'd'], 'a|b|c|d', 'rsplit', '|', 1) + self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2) + self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 3) + self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 4) + self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', + sys.maxint-100) + self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0) + self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2) + self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|') + self.checkequal(['endcase ', ''], 'endcase |', 'rsplit', '|') + self.checkequal(['', 'bothcase', ''], '|bothcase|', 'rsplit', '|') + + self.checkequal(['a\x00\x00b', 'c', 'd'], 'a\x00\x00b\x00c\x00d', 'rsplit', '\x00', 2) + + #self.checkequal(['a']*20, ('a|'*20)[:-1], 'rsplit', '|') + self.checkequal(['a|a|a|a|a']+['a']*15, + ('a|'*20)[:-1], 'rsplit', '|', 15) + + # by string + self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//') + self.checkequal(['a//b//c', 'd'], 'a//b//c//d', 'rsplit', '//', 1) + self.checkequal(['a//b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 2) + self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 3) + self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 4) + self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', + sys.maxint-5) + self.checkequal(['a//b//c//d'], 'a//b//c//d', 'rsplit', '//', 0) + self.checkequal(['a////b////c', '', 'd'], 'a////b////c////d', 'rsplit', '//', 2) + self.checkequal(['', ' begincase'], 'test begincase', 'rsplit', 'test') + self.checkequal(['endcase ', ''], 'endcase test', 'rsplit', 'test') + self.checkequal(['', ' bothcase ', ''], 'test bothcase test', + 'rsplit', 'test') + self.checkequal(['ab', 'c'], 'abbbc', 'rsplit', 'bb') + self.checkequal(['', ''], 'aaa', 'rsplit', 'aaa') + self.checkequal(['aaa'], 'aaa', 'rsplit', 'aaa', 0) + self.checkequal(['ab', 'ab'], 'abbaab', 'rsplit', 'ba') + self.checkequal(['aaaa'], 'aaaa', 'rsplit', 'aab') + self.checkequal([''], '', 'rsplit', 'aaa') + self.checkequal(['aa'], 'aa', 'rsplit', 'aaa') + self.checkequal(['bbob', 'A'], 'bbobbbobbA', 'rsplit', 'bbobb') + self.checkequal(['', 'B', 'A'], 'bbobbBbbobbA', 'rsplit', 'bbobb') + + self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH') + self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH', 19) + self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4], + 'rsplit', 'BLAH', 18) + + # mixed use of str and unicode + self.checkequal([u'a b', u'c', u'd'], 'a b c d', 'rsplit', u' ', 2) + + # argument type + self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42) + + # null case + self.checkraises(ValueError, 'hello', 'rsplit', '') + self.checkraises(ValueError, 'hello', 'rsplit', '', 0) + def test_strip(self): self.checkequal('hello', ' hello ', 'strip') self.checkequal('hello ', ' hello ', 'lstrip')