Skip to content

Commit 3b68df9

Browse files
compudjPeter Zijlstra
authored andcommitted
rseq: slice ext: Ensure rseq feature size differs from original rseq size
Before rseq became extensible, its original size was 32 bytes even though the active rseq area was only 20 bytes. This had the following impact in terms of userspace ecosystem evolution: * The GNU libc between 2.35 and 2.39 expose a __rseq_size symbol set to 32, even though the size of the active rseq area is really 20. * The GNU libc 2.40 changes this __rseq_size to 20, thus making it express the active rseq area. * Starting from glibc 2.41, __rseq_size corresponds to the AT_RSEQ_FEATURE_SIZE from getauxval(3). This means that users of __rseq_size can always expect it to correspond to the active rseq area, except for the value 32, for which the active rseq area is 20 bytes. Exposing a 32 bytes feature size would make life needlessly painful for userspace. Therefore, add a reserved field at the end of the rseq area to bump the feature size to 33 bytes. This reserved field is expected to be replaced with whatever field will come next, expecting that this field will be larger than 1 byte. The effect of this change is to increase the size from 32 to 64 bytes before we actually have fields using that memory. Clarify the allocation size and alignment requirements in the struct rseq uapi comment. Change the value returned by getauxval(AT_RSEQ_ALIGN) to return the value of the active rseq area size rounded up to next power of 2, which guarantees that the rseq structure will always be aligned on the nearest power of two large enough to contain it, even as it grows. Change the alignment check in the rseq registration accordingly. This will minimize the amount of ABI corner-cases we need to document and require userspace to play games with. The rule stays simple when __rseq_size != 32: #define rseq_field_available(field) (__rseq_size >= offsetofend(struct rseq_abi, field)) Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20260220200642.1317826-3-mathieu.desnoyers@efficios.com
1 parent 26d43a9 commit 3b68df9

File tree

4 files changed

+38
-6
lines changed

4 files changed

+38
-6
lines changed

fs/binfmt_elf.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include <linux/dax.h>
4848
#include <linux/uaccess.h>
4949
#include <uapi/linux/rseq.h>
50+
#include <linux/rseq.h>
5051
#include <asm/param.h>
5152
#include <asm/page.h>
5253

@@ -286,7 +287,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
286287
}
287288
#ifdef CONFIG_RSEQ
288289
NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
289-
NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
290+
NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align());
290291
#endif
291292
#undef NEW_AUX_ENT
292293
/* AT_NULL is zero; clear the rest too */

include/linux/rseq.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
146146
t->rseq = current->rseq;
147147
}
148148

149+
/*
150+
* Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq
151+
* registration. This is the active rseq area size rounded up to next
152+
* power of 2, which guarantees that the rseq structure will always be
153+
* aligned on the nearest power of two large enough to contain it, even
154+
* as it grows.
155+
*/
156+
static inline unsigned int rseq_alloc_align(void)
157+
{
158+
return 1U << get_count_order(offsetof(struct rseq, end));
159+
}
160+
149161
#else /* CONFIG_RSEQ */
150162
static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
151163
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }

include/uapi/linux/rseq.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,17 @@ struct rseq_slice_ctrl {
8787
};
8888

8989
/*
90-
* struct rseq is aligned on 4 * 8 bytes to ensure it is always
91-
* contained within a single cache-line.
90+
* The original size and alignment of the allocation for struct rseq is
91+
* 32 bytes.
9292
*
93-
* A single struct rseq per thread is allowed.
93+
* The allocation size needs to be greater or equal to
94+
* max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to
95+
* be aligned on max(getauxval(AT_RSEQ_ALIGN), 32).
96+
*
97+
* As an alternative, userspace is allowed to use both the original size
98+
* and alignment of 32 bytes for backward compatibility.
99+
*
100+
* A single active struct rseq registration per thread is allowed.
94101
*/
95102
struct rseq {
96103
/*
@@ -180,10 +187,21 @@ struct rseq {
180187
*/
181188
struct rseq_slice_ctrl slice_ctrl;
182189

190+
/*
191+
* Before rseq became extensible, its original size was 32 bytes even
192+
* though the active rseq area was only 20 bytes.
193+
* Exposing a 32 bytes feature size would make life needlessly painful
194+
* for userspace. Therefore, add a reserved byte after byte 32
195+
* to bump the rseq feature size from 32 to 33.
196+
* The next field to be added to the rseq area will be larger
197+
* than one byte, and will replace this reserved byte.
198+
*/
199+
__u8 __reserved;
200+
183201
/*
184202
* Flexible array member at end of structure, after last feature field.
185203
*/
186204
char end[];
187-
} __attribute__((aligned(4 * sizeof(__u64))));
205+
} __attribute__((aligned(32)));
188206

189207
#endif /* _UAPI_LINUX_RSEQ_H */

kernel/rseq.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
#include <linux/syscalls.h>
8181
#include <linux/uaccess.h>
8282
#include <linux/types.h>
83+
#include <linux/rseq.h>
8384
#include <asm/ptrace.h>
8485

8586
#define CREATE_TRACE_POINTS
@@ -456,7 +457,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32
456457
*/
457458
if (rseq_len < ORIG_RSEQ_SIZE ||
458459
(rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) ||
459-
(rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
460+
(rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) ||
460461
rseq_len < offsetof(struct rseq, end))))
461462
return -EINVAL;
462463
if (!access_ok(rseq, rseq_len))

0 commit comments

Comments
 (0)