diff --git a/.gitignore b/.gitignore index 4feb914f9..4448aa892 100644 --- a/.gitignore +++ b/.gitignore @@ -22,6 +22,7 @@ .externalToolBuilders/* .project .pydevproject +*.launch # Netbeans files nbproject diff --git a/src/org/python/antlr/GrammarActions.java b/src/org/python/antlr/GrammarActions.java index fc268e33d..9f98e98e0 100644 --- a/src/org/python/antlr/GrammarActions.java +++ b/src/org/python/antlr/GrammarActions.java @@ -445,9 +445,13 @@ class StringPair { private String s; private boolean unicode; - StringPair(String s, boolean unicode) { + StringPair(String s, boolean unicode, String encoding) { this.s = s; this.unicode = unicode; + // in case of no encoding, enforce unicode if the string does not fit into a real PyString + if (encoding == null && !unicode && !PyString.charsFitWidth(s, 7)) { + this.unicode = true; + } } String getString() { return s; @@ -534,7 +538,7 @@ StringPair extractString(Token t, String encoding, boolean unicodeLiterals) { // Plain unicode: already decoded, just handle escapes string = PyString.decode_UnicodeEscape(string, start, end, "strict", ustring); } - return new StringPair(string, ustring); + return new StringPair(string, ustring, encoding); } Token extractStringToken(List s) { diff --git a/src/org/python/core/PyString.java b/src/org/python/core/PyString.java index 1f9700cfd..d884543c3 100644 --- a/src/org/python/core/PyString.java +++ b/src/org/python/core/PyString.java @@ -114,10 +114,10 @@ public PyString(char c) { * 2width-1. We use this to test for "byte-like" or ASCII. * * @param s string to test - * @param width number of bits within which each character must fit (<16) + * @param width number of bits within which each character must fit (<16) * @return true if and only if every character has a code less than 2^width */ - static boolean charsFitWidth(String s, int width) { + public static boolean charsFitWidth(String s, int width) { final int N = s.length(); diff --git a/tests/java/org/python/core/JavaLangStringConstructor.java b/tests/java/org/python/core/JavaLangStringConstructor.java new file mode 100644 index 000000000..b28720dd0 --- /dev/null +++ b/tests/java/org/python/core/JavaLangStringConstructor.java @@ -0,0 +1,14 @@ +package org.python.core; + +public final class JavaLangStringConstructor { + + private final String constructedValue; + + public JavaLangStringConstructor(String value) { + this.constructedValue = value; + } + + public String getConstructedValue() { + return constructedValue; + } +} diff --git a/tests/java/org/python/core/JavaLangStringProvider.java b/tests/java/org/python/core/JavaLangStringProvider.java new file mode 100644 index 000000000..18b6bb260 --- /dev/null +++ b/tests/java/org/python/core/JavaLangStringProvider.java @@ -0,0 +1,90 @@ +package org.python.core; + +public final class JavaLangStringProvider { + + private static final String SMALL_O_UMLAUT = "\u00F6"; + private static final String RIGHT_SINGLE_QUOTATION_MARK = "\u2019"; + + private static final String BEAUTIFUL = "sch" + SMALL_O_UMLAUT + "n"; + private static final String START_OF_BEAUTIFUL = "sch" + SMALL_O_UMLAUT; + private static final String END_OF_BEAUTIFUL = SMALL_O_UMLAUT + "n"; + + private static final String JEANNE_DARC = "Jeanne d" + RIGHT_SINGLE_QUOTATION_MARK + "Arc"; + private static final String START_OF_JEANNE_DARC = "Jeanne d" + RIGHT_SINGLE_QUOTATION_MARK + "A"; + private static final String END_OF_JEANNE_DARC = "d" + RIGHT_SINGLE_QUOTATION_MARK + "Arc"; + + private static final String BEAUTIFUL_JEANNE_DARC = BEAUTIFUL + "e" + JEANNE_DARC; + + /** + * Provides a single small o umlaut + */ + public static final String getSmallOUmlaut() { + return SMALL_O_UMLAUT; + } + + /** + * Provides the word 'beautiful' in German, using a small o umlaut + */ + public static final String getBeautiful() { + return BEAUTIFUL; + } + + /** + * Provides the start of 'beautiful' in German, using a small o umlaut + */ + public static final String getStartOfBeautiful() { + return START_OF_BEAUTIFUL; + } + + /** + * Provides the end of 'beautiful' in German, using a small o umlaut + */ + public static final String getEndOfBeautiful() { + return END_OF_BEAUTIFUL; + } + + /** + * Provides the word 'more beautiful' in German, using a small o umlaut + */ + public static final String getMoreBeautiful() { + return BEAUTIFUL + "er"; + } + + /** + * Provides the right single quotation mark + * + * @see "https://www.compart.com/en/unicode/U+2019" + */ + public static final String getRightSingleQuotationMark() { + return RIGHT_SINGLE_QUOTATION_MARK; + } + + /** + * Provides the name of Jeanne d'Arc, but using a right single quotation mark as apostrophe + */ + public static final String getJeanneDArc() { + return JEANNE_DARC; + } + + /** + * Provides the start of Jeanne d'Arc, including a right single quotation mark as apostrophe + */ + public static final String getStartOfJeanneDArc() { + return START_OF_JEANNE_DARC; + } + + /** + * Provides the end of Jeanne d'Arc, including a right single quotation mark as apostrophe + */ + public static final String getEndOfJeanneDArc() { + return END_OF_JEANNE_DARC; + } + + /** + * Provides beautiful Jeanne d'Arc, a mixture of Umlaut and Unicode + */ + public static final String getBeautifulJeanneDArc() { + return BEAUTIFUL_JEANNE_DARC; + } + +} diff --git a/tests/java/org/python/core/PyUnicodeComparisonTest.java b/tests/java/org/python/core/PyUnicodeComparisonTest.java new file mode 100644 index 000000000..f200e4cd9 --- /dev/null +++ b/tests/java/org/python/core/PyUnicodeComparisonTest.java @@ -0,0 +1,69 @@ +package org.python.core; + +import static java.lang.String.format; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.python.core.JavaLangStringProvider.getBeautiful; + +import org.junit.Test; +import org.python.util.PythonInterpreter; + +public class PyUnicodeComparisonTest { + + // variable name + private static final String RESULT = "result"; + + private String compare() { + StringBuffer b = new StringBuffer(); + b.append("from org.python.core import JavaLangStringProvider\n"); + b.append("value = JavaLangStringProvider.getBeautiful()\n"); + b.append("result = False\n"); + b.append("if value == '%s':\n"); + b.append(" result = True\n"); + return format(b.toString(), getBeautiful()); + } + + + @Test + public void testCompare() { + try (PythonInterpreter interpreter = new PythonInterpreter()) { + interpreter.exec(compare()); + assertResultBoolean(true, interpreter); + } + } + + private String compareConstructed() { + StringBuffer b = new StringBuffer(); + b.append("from org.python.core import JavaLangStringConstructor\n"); + b.append("value = JavaLangStringConstructor('%s').getConstructedValue()\n"); + b.append("result = False\n"); + b.append("if value == '%s':\n"); + b.append(" result = True\n"); + return format(b.toString(), getBeautiful(), getBeautiful()); + } + + @Test + public void testCompareConstructed() { + try (PythonInterpreter interpreter = new PythonInterpreter()) { + interpreter.exec(compareConstructed()); + assertResultBoolean(true, interpreter); + } + } + + + private void assertResultBoolean(boolean expected, PythonInterpreter interpreter) { + Object resultObject = interpreter.get(RESULT); + if (resultObject instanceof PyBoolean) { + PyBoolean result = (PyBoolean) resultObject; + if (expected) { + assertTrue("expected result to be True, but was False", result.getBooleanValue()); + } else { + assertFalse("expected result to be False, but was True", result.getBooleanValue()); + } + } else { + fail("expected result to be PyBoolean but was " + resultObject.getClass().getName()); + } + } + +} diff --git a/tests/java/org/python/core/PyUnicodeReplaceTest.java b/tests/java/org/python/core/PyUnicodeReplaceTest.java new file mode 100644 index 000000000..bec4d0f5f --- /dev/null +++ b/tests/java/org/python/core/PyUnicodeReplaceTest.java @@ -0,0 +1,41 @@ +package org.python.core; + +import static java.lang.String.format; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.python.core.JavaLangStringProvider.getSmallOUmlaut; + +import org.junit.Test; +import org.python.util.PythonInterpreter; + +public class PyUnicodeReplaceTest { + + // variable name + private static final String RESULT = "result"; + + private String replace() { + StringBuffer b = new StringBuffer(); + b.append("from org.python.core import JavaLangStringProvider\n"); + b.append("result = JavaLangStringProvider.getBeautiful().replace('%s', 'oe')\n"); + return format(b.toString(), getSmallOUmlaut()); + } + + @Test + public void testReplace() { + try (PythonInterpreter interpreter = new PythonInterpreter()) { + interpreter.exec(replace()); + assertResultEquals("schoen", interpreter); + } + } + + private void assertResultEquals(String expected, PythonInterpreter interpreter) { + Object resultObject = interpreter.get(RESULT); + if (resultObject instanceof PyString) { + PyString result = (PyString) resultObject; + assertEquals(expected, result.getString()); + } else { + fail("expected result to be PyString but was " + resultObject.getClass().getName()); + } + } + +}