diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index c2c508c1a71c5c..e8ae21b8746e1d 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -1039,6 +1039,35 @@ static inline Py_ALWAYS_INLINE void _Py_INCREF_MORTAL(PyObject *op) * references. */ PyAPI_FUNC(int) _PyObject_VisitType(PyObject *op, visitproc visit, void *arg); +// Pointer-by-pointer memmove for PyObject** arrays that is safe for shared +// objects in Py_GIL_DISABLED builds. Locking is the caller's responsibility. +static inline void +_Py_ptr_wise_atomic_memmove(PyObject *a, PyObject **dest, PyObject **src, + Py_ssize_t n) +{ +#ifndef Py_GIL_DISABLED + (void)a; + memmove(dest, src, n * sizeof(PyObject *)); +#else + if (_Py_IsOwnedByCurrentThread(a) && !_PyObject_GC_IS_SHARED(a)) { + // No other threads can read this object's array concurrently + memmove(dest, src, n * sizeof(PyObject *)); + return; + } + if (dest < src) { + for (Py_ssize_t i = 0; i != n; i++) { + _Py_atomic_store_ptr_release(&dest[i], src[i]); + } + } + else { + // copy backwards to avoid overwriting src before it's read + for (Py_ssize_t i = n; i != 0; i--) { + _Py_atomic_store_ptr_release(&dest[i - 1], src[i - 1]); + } + } +#endif +} + #ifdef __cplusplus } #endif diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in index 8efea27824f0e8..db81e482405b9d 100644 --- a/Modules/Setup.stdlib.in +++ b/Modules/Setup.stdlib.in @@ -172,7 +172,7 @@ @MODULE_XXSUBTYPE_TRUE@xxsubtype xxsubtype.c @MODULE__XXTESTFUZZ_TRUE@_xxtestfuzz _xxtestfuzz/_xxtestfuzz.c _xxtestfuzz/fuzzer.c @MODULE__TESTBUFFER_TRUE@_testbuffer _testbuffer.c -@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c _testinternalcapi/complex.c _testinternalcapi/interpreter.c _testinternalcapi/tuple.c +@MODULE__TESTINTERNALCAPI_TRUE@_testinternalcapi _testinternalcapi.c _testinternalcapi/test_lock.c _testinternalcapi/pytime.c _testinternalcapi/test_ptr_wise_memmove.c _testinternalcapi/set.c _testinternalcapi/test_critical_sections.c _testinternalcapi/complex.c _testinternalcapi/interpreter.c _testinternalcapi/tuple.c @MODULE__TESTCAPI_TRUE@_testcapi _testcapimodule.c _testcapi/vectorcall.c _testcapi/heaptype.c _testcapi/abstract.c _testcapi/unicode.c _testcapi/dict.c _testcapi/set.c _testcapi/list.c _testcapi/tuple.c _testcapi/getargs.c _testcapi/datetime.c _testcapi/docstring.c _testcapi/mem.c _testcapi/watchers.c _testcapi/long.c _testcapi/float.c _testcapi/complex.c _testcapi/numbers.c _testcapi/structmember.c _testcapi/exceptions.c _testcapi/code.c _testcapi/buffer.c _testcapi/pyatomic.c _testcapi/run.c _testcapi/file.c _testcapi/codec.c _testcapi/immortal.c _testcapi/gc.c _testcapi/hash.c _testcapi/time.c _testcapi/bytes.c _testcapi/object.c _testcapi/modsupport.c _testcapi/monitoring.c _testcapi/config.c _testcapi/import.c _testcapi/frame.c _testcapi/type.c _testcapi/function.c _testcapi/module.c _testcapi/weakref.c @MODULE__TESTLIMITEDCAPI_TRUE@_testlimitedcapi _testlimitedcapi.c _testlimitedcapi/abstract.c _testlimitedcapi/bytearray.c _testlimitedcapi/bytes.c _testlimitedcapi/codec.c _testlimitedcapi/complex.c _testlimitedcapi/dict.c _testlimitedcapi/eval.c _testlimitedcapi/float.c _testlimitedcapi/heaptype_relative.c _testlimitedcapi/import.c _testlimitedcapi/list.c _testlimitedcapi/long.c _testlimitedcapi/object.c _testlimitedcapi/pyos.c _testlimitedcapi/set.c _testlimitedcapi/slots.c _testlimitedcapi/sys.c _testlimitedcapi/threadstate.c _testlimitedcapi/tuple.c _testlimitedcapi/unicode.c _testlimitedcapi/vectorcall_limited.c _testlimitedcapi/version.c _testlimitedcapi/file.c _testlimitedcapi/weakref.c @MODULE__TESTCLINIC_TRUE@_testclinic _testclinic.c diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index f827274eeffba8..cfe7dbf7f17039 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -18,6 +18,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_EnterRecursiveCall() #include "pycore_dict.h" // _PyDict_CopyAsDict() +#include "pycore_object.h" // _Py_ptr_wise_atomic_memmove() #include "pycore_pyhash.h" // _Py_HashSecret #include "pycore_tuple.h" // _PyTuple_FromPair #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() @@ -1863,6 +1864,7 @@ element_subscr(PyObject *op, PyObject *item) } } + static int element_ass_subscr(PyObject *op, PyObject *item, PyObject *value) { @@ -1938,19 +1940,21 @@ element_ass_subscr(PyObject *op, PyObject *item, PyObject *value) PyList_SET_ITEM(recycle, i, self->extra->children[cur]); - memmove( + _Py_ptr_wise_atomic_memmove( + (PyObject *)self, self->extra->children + cur - i, self->extra->children + cur + 1, - num_moved * sizeof(PyObject *)); + num_moved); } /* Leftover "tail" after the last removed child */ cur = start + (size_t)slicelen * step; if (cur < (size_t)self->extra->length) { - memmove( + _Py_ptr_wise_atomic_memmove( + (PyObject *)self, self->extra->children + cur - slicelen, self->extra->children + cur, - (self->extra->length - cur) * sizeof(PyObject *)); + self->extra->length - cur); } self->extra->length -= slicelen; diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index d0d1f1f1bc8e53..ad6869e17b08db 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -3340,6 +3340,9 @@ module_exec(PyObject *module) if (_PyTestInternalCapi_Init_PyTime(module) < 0) { return 1; } + if (_PyTestInternalCapi_Init_PtrWiseMemmove(module) < 0) { + return 1; + } if (_PyTestInternalCapi_Init_Set(module) < 0) { return 1; } diff --git a/Modules/_testinternalcapi/parts.h b/Modules/_testinternalcapi/parts.h index 81f536c3babb18..398eb0a0ffb7fd 100644 --- a/Modules/_testinternalcapi/parts.h +++ b/Modules/_testinternalcapi/parts.h @@ -12,6 +12,7 @@ int _PyTestInternalCapi_Init_Lock(PyObject *module); int _PyTestInternalCapi_Init_PyTime(PyObject *module); +int _PyTestInternalCapi_Init_PtrWiseMemmove(PyObject *module); int _PyTestInternalCapi_Init_Set(PyObject *module); int _PyTestInternalCapi_Init_Complex(PyObject *module); int _PyTestInternalCapi_Init_CriticalSection(PyObject *module); diff --git a/Modules/_testinternalcapi/test_ptr_wise_memmove.c b/Modules/_testinternalcapi/test_ptr_wise_memmove.c new file mode 100644 index 00000000000000..e11ff09a48b0ed --- /dev/null +++ b/Modules/_testinternalcapi/test_ptr_wise_memmove.c @@ -0,0 +1,212 @@ +// Tests for _Py_ptr_wise_atomic_memmove() in pycore_object.h + +#include "parts.h" +#include "pycore_object.h" // _Py_ptr_wise_atomic_memmove() +#include "pycore_gc.h" // _PyObject_GC_SET_SHARED() + +// Five distinguishable immortal singletons used as placeholder pointers. +// These require no reference-count management when stored in a raw array. +#define NPTRS 5 +static PyObject *test_objs[NPTRS]; + +static void +setup_test_objs(void) +{ + test_objs[0] = Py_None; + test_objs[1] = Py_True; + test_objs[2] = Py_False; + test_objs[3] = Py_Ellipsis; + test_objs[4] = Py_NotImplemented; +} + +// Fill buf[0..NPTRS-1] with test_objs in order. +static void +fill_buf(PyObject **buf) +{ + for (int i = 0; i < NPTRS; i++) { + buf[i] = test_objs[i]; + } +} + +// Return a fresh empty PyListObject whose GC SHARED bit is set in +// Py_GIL_DISABLED builds. This forces _Py_ptr_wise_atomic_memmove() +// to take the atomic (non-fast) path so we can exercise all branches. +// In non-GIL builds the function always uses memmove, so no flag is needed. +static PyObject * +make_shared_container(void) +{ + PyObject *a = PyList_New(0); + if (a == NULL) { + return NULL; + } +#ifdef Py_GIL_DISABLED + _PyObject_GC_SET_SHARED(a); +#endif + return a; +} + +// Helper: create container, call the function, return it for cleanup. +// Returns NULL (with exception set) on allocation failure. +static PyObject * +call_memmove(PyObject **dest, PyObject **src, Py_ssize_t n) +{ + PyObject *a = make_shared_container(); + if (a != NULL) { + _Py_ptr_wise_atomic_memmove(a, dest, src, n); + } + return a; +} + + +// dest < src: forward pointer-by-pointer copy. +// buf = [0,1,2,3,4], copy src=&buf[2] n=3 to dest=&buf[0] +// Expected result: buf = [2,3,4,3,4] +static PyObject * +test_memmove_dest_lt_src(PyObject *self, PyObject *Py_UNUSED(arg)) +{ + setup_test_objs(); + + PyObject *buf[NPTRS]; + fill_buf(buf); + + PyObject *a = call_memmove(&buf[0], &buf[2], 3); + if (a == NULL) { + return NULL; + } + + assert(buf[0] == test_objs[2]); + assert(buf[1] == test_objs[3]); + assert(buf[2] == test_objs[4]); + assert(buf[3] == test_objs[3]); // unchanged + assert(buf[4] == test_objs[4]); // unchanged + + Py_DECREF(a); + Py_RETURN_NONE; +} + +// dest > src: backward pointer-by-pointer copy. +// buf = [0,1,2,3,4], copy src=&buf[0] n=3 to dest=&buf[2] +// Expected result: buf = [0,1,0,1,2] +static PyObject * +test_memmove_dest_gt_src(PyObject *self, PyObject *Py_UNUSED(arg)) +{ + setup_test_objs(); + + PyObject *buf[NPTRS]; + fill_buf(buf); + + PyObject *a = call_memmove(&buf[2], &buf[0], 3); + if (a == NULL) { + return NULL; + } + + assert(buf[0] == test_objs[0]); // unchanged + assert(buf[1] == test_objs[1]); // unchanged + assert(buf[2] == test_objs[0]); + assert(buf[3] == test_objs[1]); + assert(buf[4] == test_objs[2]); + + Py_DECREF(a); + Py_RETURN_NONE; +} + +// dest == src: backward copy where every write is a no-op. +// buf = [0,1,2,3,4], copy src=&buf[1] n=3 to dest=&buf[1] +// Expected result: buf unchanged. +static PyObject * +test_memmove_dest_eq_src(PyObject *self, PyObject *Py_UNUSED(arg)) +{ + setup_test_objs(); + + PyObject *buf[NPTRS]; + fill_buf(buf); + + PyObject *a = call_memmove(&buf[1], &buf[1], 3); + if (a == NULL) { + return NULL; + } + + for (int i = 0; i < NPTRS; i++) { + assert(buf[i] == test_objs[i]); + } + + Py_DECREF(a); + Py_RETURN_NONE; +} + +// Overlapping ranges, dest < src: forward copy is safe. +// buf = [0,1,2,3,4], copy src=&buf[2] n=3 to dest=&buf[1] +// Forward: buf[1]=buf[2]=2, buf[2]=buf[3]=3, buf[3]=buf[4]=4 +// Expected result: buf = [0,2,3,4,4] +static PyObject * +test_memmove_overlapping(PyObject *self, PyObject *Py_UNUSED(arg)) +{ + setup_test_objs(); + + PyObject *buf[NPTRS]; + fill_buf(buf); + + PyObject *a = call_memmove(&buf[1], &buf[2], 3); + if (a == NULL) { + return NULL; + } + + assert(buf[0] == test_objs[0]); // unchanged + assert(buf[1] == test_objs[2]); + assert(buf[2] == test_objs[3]); + assert(buf[3] == test_objs[4]); + assert(buf[4] == test_objs[4]); // unchanged + + Py_DECREF(a); + Py_RETURN_NONE; +} + +// Single owner fast path (GIL_DISABLED only): object owned by this thread +// and not GC-shared => memmove is used instead of atomic stores. +// In non-GIL builds this always takes the memmove path, so the test +// degenerates to a basic correctness check. +static PyObject * +test_memmove_single_owner(PyObject *self, PyObject *Py_UNUSED(arg)) +{ + setup_test_objs(); + + PyObject *a = PyList_New(0); + if (a == NULL) { + return NULL; + } + +#ifdef Py_GIL_DISABLED + // A freelist-reused list may carry the SHARED bit set by a sibling test. + // Clear it explicitly so this test exercises the single-owner fast path. + _PyObject_CLEAR_GC_BITS(a, _PyGC_BITS_SHARED); + assert(_Py_IsOwnedByCurrentThread(a) && !_PyObject_GC_IS_SHARED(a)); +#endif + + PyObject *buf[NPTRS]; + fill_buf(buf); + + _Py_ptr_wise_atomic_memmove(a, &buf[0], &buf[2], 3); + + assert(buf[0] == test_objs[2]); + assert(buf[1] == test_objs[3]); + assert(buf[2] == test_objs[4]); + + Py_DECREF(a); + Py_RETURN_NONE; +} + + +static PyMethodDef test_methods[] = { + {"test_memmove_dest_lt_src", test_memmove_dest_lt_src, METH_NOARGS}, + {"test_memmove_dest_gt_src", test_memmove_dest_gt_src, METH_NOARGS}, + {"test_memmove_dest_eq_src", test_memmove_dest_eq_src, METH_NOARGS}, + {"test_memmove_overlapping", test_memmove_overlapping, METH_NOARGS}, + {"test_memmove_single_owner", test_memmove_single_owner, METH_NOARGS}, + {NULL}, +}; + +int +_PyTestInternalCapi_Init_PtrWiseMemmove(PyObject *mod) +{ + return PyModule_AddFunctions(mod, test_methods); +} diff --git a/Objects/listobject.c b/Objects/listobject.c index 8a9c9bda68269b..e2044d17d7e957 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -10,7 +10,7 @@ #include "pycore_list.h" // struct _Py_list_freelist, _PyListIterObject #include "pycore_long.h" // _PyLong_DigitCount #include "pycore_modsupport.h" // _PyArg_NoKwnames() -#include "pycore_object.h" // _PyObject_GC_TRACK(), _PyDebugAllocatorStats() +#include "pycore_object.h" // _PyObject_GC_TRACK(), _Py_ptr_wise_atomic_memmove() #include "pycore_pyatomic_ft_wrappers.h" #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_stackref.h" // _Py_TryIncrefCompareStackRef() @@ -912,33 +912,6 @@ list_clear_slot(PyObject *self) return 0; } -// Pointer-by-pointer memmove for PyObject** arrays that is safe -// for shared lists in Py_GIL_DISABLED builds. -static void -ptr_wise_atomic_memmove(PyListObject *a, PyObject **dest, PyObject **src, Py_ssize_t n) -{ -#ifndef Py_GIL_DISABLED - memmove(dest, src, n * sizeof(PyObject *)); -#else - _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(a); - if (_Py_IsOwnedByCurrentThread((PyObject *)a) && !_PyObject_GC_IS_SHARED(a)) { - // No other threads can read this list concurrently - memmove(dest, src, n * sizeof(PyObject *)); - return; - } - if (dest < src) { - for (Py_ssize_t i = 0; i != n; i++) { - _Py_atomic_store_ptr_release(&dest[i], src[i]); - } - } - else { - // copy backwards to avoid overwriting src before it's read - for (Py_ssize_t i = n; i != 0; i--) { - _Py_atomic_store_ptr_release(&dest[i - 1], src[i - 1]); - } - } -#endif -} /* a[ilow:ihigh] = v if v != NULL. * del a[ilow:ihigh] if v == NULL. @@ -1011,7 +984,8 @@ list_ass_slice_lock_held(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyO if (d < 0) { /* Delete -d items */ Py_ssize_t tail = Py_SIZE(a) - ihigh; - ptr_wise_atomic_memmove(a, &item[ihigh+d], &item[ihigh], tail); + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(a); + _Py_ptr_wise_atomic_memmove((PyObject *)a, &item[ihigh+d], &item[ihigh], tail); (void)list_resize(a, Py_SIZE(a) + d); // NB: shrinking a list can't fail item = a->ob_item; } @@ -1020,7 +994,8 @@ list_ass_slice_lock_held(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyO if (list_resize(a, k+d) < 0) goto Error; item = a->ob_item; - ptr_wise_atomic_memmove(a, &item[ihigh+d], &item[ihigh], k - ihigh); + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(a); + _Py_ptr_wise_atomic_memmove((PyObject *)a, &item[ihigh+d], &item[ihigh], k - ihigh); } for (k = 0; k < n; k++, ilow++) { PyObject *w = vitem[k]; @@ -1108,10 +1083,11 @@ list_inplace_repeat_lock_held(PyListObject *self, Py_ssize_t n) _Py_memory_repeat((char *)items, sizeof(PyObject *)*output_size, sizeof(PyObject *)*input_size); #else + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); Py_ssize_t copied = input_size; while (copied < output_size) { Py_ssize_t items_to_copy = Py_MIN(copied, output_size - copied); - ptr_wise_atomic_memmove(self, items + copied, items, items_to_copy); + _Py_ptr_wise_atomic_memmove((PyObject *)self, items + copied, items, items_to_copy); copied += items_to_copy; } #endif @@ -1609,8 +1585,9 @@ list_pop_impl(PyListObject *self, Py_ssize_t index) } Py_ssize_t size_after_pop = Py_SIZE(self) - 1; if (index < size_after_pop) { - ptr_wise_atomic_memmove(self, &items[index], &items[index+1], - size_after_pop - index); + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + _Py_ptr_wise_atomic_memmove((PyObject *)self, &items[index], &items[index+1], + size_after_pop - index); } list_resize(self, size_after_pop); // NB: shrinking a list can't fail return v; @@ -3793,12 +3770,14 @@ list_ass_subscript_lock_held(PyObject *_self, PyObject *item, PyObject *value) lim = Py_SIZE(self) - cur - 1; } - ptr_wise_atomic_memmove(self, self->ob_item + cur - i, + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + _Py_ptr_wise_atomic_memmove((PyObject *)self, self->ob_item + cur - i, self->ob_item + cur + 1, lim); } cur = start + (size_t)slicelength * step; if (cur < (size_t)Py_SIZE(self)) { - ptr_wise_atomic_memmove(self, self->ob_item + cur - slicelength, + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self); + _Py_ptr_wise_atomic_memmove((PyObject *)self, self->ob_item + cur - slicelength, self->ob_item + cur, Py_SIZE(self) - cur); } diff --git a/PCbuild/_testinternalcapi.vcxproj b/PCbuild/_testinternalcapi.vcxproj index f3e423fa04668e..dbf0c13445993a 100644 --- a/PCbuild/_testinternalcapi.vcxproj +++ b/PCbuild/_testinternalcapi.vcxproj @@ -97,6 +97,7 @@ +