diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index e3e02097b1f550..c7dc69defded50 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -605,7 +605,7 @@ def test_Struct_reinitialization(self): self.assertEqual(s.unpack(packed), (1, 2)) with self.assertWarnsRegex(FutureWarning, msg): - with self.assertRaises(UnicodeEncodeError): + with self.assertRaises(ValueError): s.__init__('\udc00') self.assertEqual(s.format, '>hh') self.assertEqual(s.pack(1, 2), packed) @@ -872,10 +872,10 @@ def __init__(self, *args, **kwargs): with self.assertWarnsRegex(DeprecationWarning, warnmsg + 'bad char'): my_struct = MyStruct(format='$') self.assertEqual(my_struct.pack(12345), b'\x30\x39') - with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't encode"): + with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"): my_struct = MyStruct('\udc00') self.assertEqual(my_struct.pack(12345), b'\x30\x39') - with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't encode"): + with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"): my_struct = MyStruct(format='\udc00') self.assertEqual(my_struct.pack(12345), b'\x30\x39') @@ -928,11 +928,16 @@ def __init__(self, newargs, initargs): with self.assertWarns(FutureWarning): with self.assertRaises(struct.error): MyStruct(('>h',), ('$',)) - with self.assertRaises(UnicodeEncodeError): + with self.assertRaises(ValueError): MyStruct(('\udc00',), ('>h',)) + with self.assertRaises(ValueError): + MyStruct((b'\xa4',), ('>h',)) with self.assertWarns(FutureWarning): - with self.assertRaises(UnicodeEncodeError): + with self.assertRaises(ValueError): MyStruct(('>h',), ('\udc00',)) + with self.assertWarns(FutureWarning): + with self.assertRaises(ValueError): + MyStruct(('>h',), (b'\xa4',)) with self.assertWarns(FutureWarning): my_struct = MyStruct(('>h',), ('h', 42) with self.assertRaises(TypeError): @@ -1004,7 +1011,7 @@ def test_operations_on_half_initialized_Struct(self): self.assertRaises(RuntimeError, S.pack_into, spam, 1) self.assertRaises(RuntimeError, S.unpack, spam) self.assertRaises(RuntimeError, S.unpack_from, spam) - self.assertRaises(RuntimeError, getattr, S, 'format') + self.assertRaises(AttributeError, getattr, S, 'format') self.assertRaises(RuntimeError, S.__sizeof__) self.assertRaises(RuntimeError, repr, S) self.assertEqual(S.size, -1) diff --git a/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst b/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst new file mode 100644 index 00000000000000..35ba57a95b0e7e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-12-12-17-39.gh-issue-145850.uW3stt.rst @@ -0,0 +1,6 @@ +Changed some implementation details in :class:`struct.Struct`: calling it +with non-ASCII string format will now raise a :exc:`ValueError` instead of +:exc:`UnicodeEncodeError`, calling it with non-ASCII bytes format will now +raise a :exc:`ValueError` instead of :exc:`struct.error`, getting +the :attr:`!format` attribute of uninitialized object will now raise an +:exc:`AttributeError` instead of :exc:`RuntimeError`. diff --git a/Modules/_struct.c b/Modules/_struct.c index 7eddc9bdc38a89..2059218029ea34 100644 --- a/Modules/_struct.c +++ b/Modules/_struct.c @@ -1635,8 +1635,12 @@ prepare_s(PyStructObject *self, PyObject *format) _structmodulestate *state = get_struct_state_structinst(self); - fmt = PyBytes_AS_STRING(format); - if (strlen(fmt) != (size_t)PyBytes_GET_SIZE(format)) { + if (!PyUnicode_IS_ASCII(format)) { + PyErr_SetString(PyExc_ValueError, "non-ASCII character in struct format"); + return -1; + } + fmt = (const char *)PyUnicode_1BYTE_DATA(format); + if (strlen(fmt) != (size_t)PyUnicode_GET_LENGTH(format)) { PyErr_SetString(state->StructError, "embedded null character"); return -1; @@ -1780,12 +1784,11 @@ static int set_format(PyStructObject *self, PyObject *format) { if (PyUnicode_Check(format)) { - format = PyUnicode_AsASCIIString(format); - if (format == NULL) - return -1; + format = PyUnicode_FromObject(format); } else if (PyBytes_Check(format)) { - Py_INCREF(format); + format = PyUnicode_DecodeASCII(PyBytes_AS_STRING(format), + PyBytes_GET_SIZE(format), "surrogateescape"); } else { PyErr_Format(PyExc_TypeError, @@ -1793,6 +1796,9 @@ set_format(PyStructObject *self, PyObject *format) "not %T", format); return -1; } + if (format == NULL) { + return -1; + } if (prepare_s(self, format)) { Py_DECREF(format); return -1; @@ -1821,7 +1827,7 @@ Struct_impl(PyTypeObject *type, PyObject *format) if (self == NULL) { return NULL; } - self->s_format = Py_NewRef(Py_None); + self->s_format = NULL; self->s_codes = NULL; self->s_size = -1; self->s_len = -1; @@ -1878,7 +1884,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (self == NULL) { return NULL; } - self->s_format = Py_NewRef(Py_None); + self->s_format = NULL; self->s_codes = NULL; self->s_size = -1; self->s_len = -1; @@ -1892,7 +1898,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; } PyObject *exc = PyErr_GetRaisedException(); - Py_SETREF(self->s_format, Py_NewRef(Py_None)); + Py_CLEAR(self->s_format); if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1, "Invalid 'format' argument for Struct.__new__(): %S", exc)) { @@ -1910,8 +1916,8 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds) static bool same_format(PyStructObject *s, PyObject *format) { - Py_ssize_t size = PyBytes_GET_SIZE(s->s_format); - const void *data = PyBytes_AS_STRING(s->s_format); + Py_ssize_t size = PyUnicode_GET_LENGTH(s->s_format); + const void *data = PyUnicode_1BYTE_DATA(s->s_format); if (PyUnicode_Check(format) && PyUnicode_IS_ASCII(format)) { return PyUnicode_GET_LENGTH(format) == size && memcmp(PyUnicode_1BYTE_DATA(format), data, size) == 0; @@ -1938,7 +1944,7 @@ static int Struct___init___impl(PyStructObject *self, PyObject *format) /*[clinic end generated code: output=b8e80862444e92d0 input=1af78a5f57d82cec]*/ { - if (self->s_format == Py_None) { + if (self->s_format == NULL) { if (set_format(self, format) < 0) { return -1; } @@ -1965,7 +1971,7 @@ s_init(PyObject *self, PyObject *args, PyObject *kwargs) { if (!((PyStructObject *)self)->init_called && Py_TYPE(self)->tp_init == s_init - && ((PyStructObject *)self)->s_format != Py_None) + && ((PyStructObject *)self)->s_format != NULL) { /* Struct.__init__() was called implicitly. * __new__() already did all the work. */ @@ -2508,22 +2514,6 @@ Struct_pack_into_impl(PyStructObject *self, Py_buffer *buffer, Py_RETURN_NONE; } -static PyObject * -s_get_format(PyObject *op, void *Py_UNUSED(closure)) -{ - PyStructObject *self = PyStructObject_CAST(op); - ENSURE_STRUCT_IS_READY(self); - return PyUnicode_FromStringAndSize(PyBytes_AS_STRING(self->s_format), - PyBytes_GET_SIZE(self->s_format)); -} - -static PyObject * -s_get_size(PyObject *op, void *Py_UNUSED(closure)) -{ - PyStructObject *self = PyStructObject_CAST(op); - return PyLong_FromSsize_t(self->s_size); -} - /*[clinic input] Struct.__sizeof__ [clinic start generated code]*/ @@ -2545,14 +2535,7 @@ s_repr(PyObject *op) { PyStructObject *self = PyStructObject_CAST(op); ENSURE_STRUCT_IS_READY(self); - PyObject* fmt = PyUnicode_FromStringAndSize( - PyBytes_AS_STRING(self->s_format), PyBytes_GET_SIZE(self->s_format)); - if (fmt == NULL) { - return NULL; - } - PyObject* s = PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)), fmt); - Py_DECREF(fmt); - return s; + return PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)), self->s_format); } /* List of functions */ @@ -2569,15 +2552,13 @@ static struct PyMethodDef s_methods[] = { static PyMemberDef s_members[] = { {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PyStructObject, weakreflist), Py_READONLY}, + {"format", Py_T_OBJECT_EX, offsetof(PyStructObject, s_format), + Py_READONLY, PyDoc_STR("struct format string")}, + {"size", Py_T_PYSSIZET, offsetof(PyStructObject, s_size), Py_READONLY, + PyDoc_STR("struct size in bytes")}, {NULL} /* sentinel */ }; -static PyGetSetDef s_getsetlist[] = { - {"format", s_get_format, NULL, PyDoc_STR("struct format string"), NULL}, - {"size", s_get_size, NULL, PyDoc_STR("struct size in bytes"), NULL}, - {NULL} /* sentinel */ -}; - static PyType_Slot PyStructType_slots[] = { {Py_tp_dealloc, s_dealloc}, {Py_tp_getattro, PyObject_GenericGetAttr}, @@ -2588,7 +2569,6 @@ static PyType_Slot PyStructType_slots[] = { {Py_tp_clear, s_clear}, {Py_tp_methods, s_methods}, {Py_tp_members, s_members}, - {Py_tp_getset, s_getsetlist}, {Py_tp_init, s_init}, {Py_tp_new, s_new}, {0, 0}, diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index f3a22f3f6a87cb..6cb11562476e40 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -133,6 +133,10 @@ static int fuzz_struct_unpack(const char* data, size_t size) { if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) { PyErr_Clear(); } + /* Ignore any ValueError, these are triggered by non-ASCII format. */ + if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } /* Ignore any struct.error exceptions, these can be caused by invalid formats or incomplete buffers both of which are common. */ if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {