Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions Lib/test/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ def test_Struct_reinitialization(self):
self.assertEqual(s.unpack(packed), (1, 2))

with self.assertWarnsRegex(FutureWarning, msg):
with self.assertRaises(UnicodeEncodeError):
with self.assertRaises(ValueError):
s.__init__('\udc00')
self.assertEqual(s.format, '>hh')
self.assertEqual(s.pack(1, 2), packed)
Expand Down Expand Up @@ -872,10 +872,10 @@ def __init__(self, *args, **kwargs):
with self.assertWarnsRegex(DeprecationWarning, warnmsg + 'bad char'):
my_struct = MyStruct(format='$')
self.assertEqual(my_struct.pack(12345), b'\x30\x39')
with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't encode"):
with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"):
my_struct = MyStruct('\udc00')
self.assertEqual(my_struct.pack(12345), b'\x30\x39')
with self.assertWarnsRegex(DeprecationWarning, warnmsg + ".*can't encode"):
with self.assertWarnsRegex(DeprecationWarning, warnmsg + "non-ASCII"):
my_struct = MyStruct(format='\udc00')
self.assertEqual(my_struct.pack(12345), b'\x30\x39')

Expand Down Expand Up @@ -928,11 +928,16 @@ def __init__(self, newargs, initargs):
with self.assertWarns(FutureWarning):
with self.assertRaises(struct.error):
MyStruct(('>h',), ('$',))
with self.assertRaises(UnicodeEncodeError):
with self.assertRaises(ValueError):
MyStruct(('\udc00',), ('>h',))
with self.assertRaises(ValueError):
MyStruct((b'\xa4',), ('>h',))
with self.assertWarns(FutureWarning):
with self.assertRaises(UnicodeEncodeError):
with self.assertRaises(ValueError):
MyStruct(('>h',), ('\udc00',))
with self.assertWarns(FutureWarning):
with self.assertRaises(ValueError):
MyStruct(('>h',), (b'\xa4',))
with self.assertWarns(FutureWarning):
my_struct = MyStruct(('>h',), ('<h',))
self.assertEqual(my_struct.format, '<h')
Expand All @@ -954,8 +959,10 @@ class MyStruct(struct.Struct):
MyStruct(42)
with self.assertRaises(struct.error):
MyStruct('$')
with self.assertRaises(UnicodeEncodeError):
with self.assertRaises(ValueError):
MyStruct('\udc00')
with self.assertRaises(ValueError):
MyStruct(b'\xa4')
with self.assertRaises(TypeError):
MyStruct('>h', 42)
with self.assertRaises(TypeError):
Expand Down Expand Up @@ -1004,7 +1011,7 @@ def test_operations_on_half_initialized_Struct(self):
self.assertRaises(RuntimeError, S.pack_into, spam, 1)
self.assertRaises(RuntimeError, S.unpack, spam)
self.assertRaises(RuntimeError, S.unpack_from, spam)
self.assertRaises(RuntimeError, getattr, S, 'format')
self.assertRaises(AttributeError, getattr, S, 'format')
self.assertRaises(RuntimeError, S.__sizeof__)
self.assertRaises(RuntimeError, repr, S)
self.assertEqual(S.size, -1)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Changed some implementation details in :class:`struct.Struct`: calling it
with non-ASCII string format will now raise a :exc:`ValueError` instead of
:exc:`UnicodeEncodeError`, calling it with non-ASCII bytes format will now
raise a :exc:`ValueError` instead of :exc:`struct.error`, getting
the :attr:`!format` attribute of uninitialized object will now raise an
:exc:`AttributeError` instead of :exc:`RuntimeError`.
68 changes: 24 additions & 44 deletions Modules/_struct.c
Original file line number Diff line number Diff line change
Expand Up @@ -1635,8 +1635,12 @@ prepare_s(PyStructObject *self, PyObject *format)

_structmodulestate *state = get_struct_state_structinst(self);

fmt = PyBytes_AS_STRING(format);
if (strlen(fmt) != (size_t)PyBytes_GET_SIZE(format)) {
if (!PyUnicode_IS_ASCII(format)) {
PyErr_SetString(PyExc_ValueError, "non-ASCII character in struct format");
return -1;
}
fmt = (const char *)PyUnicode_1BYTE_DATA(format);
if (strlen(fmt) != (size_t)PyUnicode_GET_LENGTH(format)) {
PyErr_SetString(state->StructError,
"embedded null character");
return -1;
Expand Down Expand Up @@ -1780,19 +1784,21 @@ static int
set_format(PyStructObject *self, PyObject *format)
{
if (PyUnicode_Check(format)) {
format = PyUnicode_AsASCIIString(format);
if (format == NULL)
return -1;
format = PyUnicode_FromObject(format);
}
else if (PyBytes_Check(format)) {
Py_INCREF(format);
format = PyUnicode_DecodeASCII(PyBytes_AS_STRING(format),
PyBytes_GET_SIZE(format), "surrogateescape");
}
else {
PyErr_Format(PyExc_TypeError,
"Struct() argument 1 must be a str or bytes object, "
"not %T", format);
return -1;
}
if (format == NULL) {
return -1;
}
if (prepare_s(self, format)) {
Py_DECREF(format);
return -1;
Expand Down Expand Up @@ -1821,7 +1827,7 @@ Struct_impl(PyTypeObject *type, PyObject *format)
if (self == NULL) {
return NULL;
}
self->s_format = Py_NewRef(Py_None);
self->s_format = NULL;
self->s_codes = NULL;
self->s_size = -1;
self->s_len = -1;
Expand Down Expand Up @@ -1878,7 +1884,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (self == NULL) {
return NULL;
}
self->s_format = Py_NewRef(Py_None);
self->s_format = NULL;
self->s_codes = NULL;
self->s_size = -1;
self->s_len = -1;
Expand All @@ -1892,7 +1898,7 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
return NULL;
}
PyObject *exc = PyErr_GetRaisedException();
Py_SETREF(self->s_format, Py_NewRef(Py_None));
Py_CLEAR(self->s_format);
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"Invalid 'format' argument for Struct.__new__(): %S", exc))
{
Expand All @@ -1910,8 +1916,8 @@ s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
static bool
same_format(PyStructObject *s, PyObject *format)
{
Py_ssize_t size = PyBytes_GET_SIZE(s->s_format);
const void *data = PyBytes_AS_STRING(s->s_format);
Py_ssize_t size = PyUnicode_GET_LENGTH(s->s_format);
const void *data = PyUnicode_1BYTE_DATA(s->s_format);
if (PyUnicode_Check(format) && PyUnicode_IS_ASCII(format)) {
return PyUnicode_GET_LENGTH(format) == size
&& memcmp(PyUnicode_1BYTE_DATA(format), data, size) == 0;
Expand All @@ -1938,7 +1944,7 @@ static int
Struct___init___impl(PyStructObject *self, PyObject *format)
/*[clinic end generated code: output=b8e80862444e92d0 input=1af78a5f57d82cec]*/
{
if (self->s_format == Py_None) {
if (self->s_format == NULL) {
if (set_format(self, format) < 0) {
return -1;
}
Expand All @@ -1965,7 +1971,7 @@ s_init(PyObject *self, PyObject *args, PyObject *kwargs)
{
if (!((PyStructObject *)self)->init_called
&& Py_TYPE(self)->tp_init == s_init
&& ((PyStructObject *)self)->s_format != Py_None)
&& ((PyStructObject *)self)->s_format != NULL)
{
/* Struct.__init__() was called implicitly.
* __new__() already did all the work. */
Expand Down Expand Up @@ -2508,22 +2514,6 @@ Struct_pack_into_impl(PyStructObject *self, Py_buffer *buffer,
Py_RETURN_NONE;
}

static PyObject *
s_get_format(PyObject *op, void *Py_UNUSED(closure))
{
PyStructObject *self = PyStructObject_CAST(op);
ENSURE_STRUCT_IS_READY(self);
return PyUnicode_FromStringAndSize(PyBytes_AS_STRING(self->s_format),
PyBytes_GET_SIZE(self->s_format));
}

static PyObject *
s_get_size(PyObject *op, void *Py_UNUSED(closure))
{
PyStructObject *self = PyStructObject_CAST(op);
return PyLong_FromSsize_t(self->s_size);
}

/*[clinic input]
Struct.__sizeof__
[clinic start generated code]*/
Expand All @@ -2545,14 +2535,7 @@ s_repr(PyObject *op)
{
PyStructObject *self = PyStructObject_CAST(op);
ENSURE_STRUCT_IS_READY(self);
PyObject* fmt = PyUnicode_FromStringAndSize(
PyBytes_AS_STRING(self->s_format), PyBytes_GET_SIZE(self->s_format));
if (fmt == NULL) {
return NULL;
}
PyObject* s = PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)), fmt);
Py_DECREF(fmt);
return s;
return PyUnicode_FromFormat("%s(%R)", _PyType_Name(Py_TYPE(self)), self->s_format);
}

/* List of functions */
Expand All @@ -2569,15 +2552,13 @@ static struct PyMethodDef s_methods[] = {

static PyMemberDef s_members[] = {
{"__weaklistoffset__", Py_T_PYSSIZET, offsetof(PyStructObject, weakreflist), Py_READONLY},
{"format", Py_T_OBJECT_EX, offsetof(PyStructObject, s_format),
Py_READONLY, PyDoc_STR("struct format string")},
{"size", Py_T_PYSSIZET, offsetof(PyStructObject, s_size), Py_READONLY,
PyDoc_STR("struct size in bytes")},
{NULL} /* sentinel */
};

static PyGetSetDef s_getsetlist[] = {
{"format", s_get_format, NULL, PyDoc_STR("struct format string"), NULL},
{"size", s_get_size, NULL, PyDoc_STR("struct size in bytes"), NULL},
{NULL} /* sentinel */
};

static PyType_Slot PyStructType_slots[] = {
{Py_tp_dealloc, s_dealloc},
{Py_tp_getattro, PyObject_GenericGetAttr},
Expand All @@ -2588,7 +2569,6 @@ static PyType_Slot PyStructType_slots[] = {
{Py_tp_clear, s_clear},
{Py_tp_methods, s_methods},
{Py_tp_members, s_members},
{Py_tp_getset, s_getsetlist},
{Py_tp_init, s_init},
{Py_tp_new, s_new},
{0, 0},
Expand Down
4 changes: 4 additions & 0 deletions Modules/_xxtestfuzz/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ static int fuzz_struct_unpack(const char* data, size_t size) {
if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_SystemError)) {
PyErr_Clear();
}
/* Ignore any ValueError, these are triggered by non-ASCII format. */
if (unpacked == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
}
/* Ignore any struct.error exceptions, these can be caused by invalid
formats or incomplete buffers both of which are common. */
if (unpacked == NULL && PyErr_ExceptionMatches(struct_error)) {
Expand Down
Loading