Skip to content

Commit a50026b

Browse files
torvaldsbrauner
authored andcommitted
iov_iter: get rid of 'copy_mc' flag
This flag is only set by one single user: the magical core dumping code that looks up user pages one by one, and then writes them out using their kernel addresses (by using a BVEC_ITER). That actually ends up being a huge problem, because while we do use copy_mc_to_kernel() for this case and it is able to handle the possible machine checks involved, nothing else is really ready to handle the failures caused by the machine check. In particular, as reported by Tong Tiangen, we don't actually support fault_in_iov_iter_readable() on a machine check area. As a result, the usual logic for writing things to a file under a filesystem lock, which involves doing a copy with page faults disabled and then if that fails trying to fault pages in without holding the locks with fault_in_iov_iter_readable() does not work at all. We could decide to always just make the MC copy "succeed" (and filling the destination with zeroes), and that would then create a core dump file that just ignores any machine checks. But honestly, this single special case has been problematic before, and means that all the normal iov_iter code ends up slightly more complex and slower. See for example commit c9eec08 ("iov_iter: Don't deal with iter->copy_mc in memcpy_from_iter_mc()") where David Howells re-organized the code just to avoid having to check the 'copy_mc' flags inside the inner iov_iter loops. So considering that we have exactly one user, and that one user is a non-critical special case that doesn't actually ever trigger in real life (Tong found this with manual error injection), the sane solution is to just decide that the onus on handling the machine check lines on that user instead. Ergo, do the copy_mc_to_kernel() in the core dump logic itself, copying the user data to a stable kernel page before writing it out. Fixes: f198274 ("iov_iter: Convert iterate*() to inline funcs") Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> Link: https://lore.kernel.org/r/20240305133336.3804360-1-tongtiangen@huawei.com Link: https://lore.kernel.org/all/4e80924d-9c85-f13a-722a-6a5d2b1c225a@huawei.com/ Tested-by: David Howells <dhowells@redhat.com> Reviewed-by: David Howells <dhowells@redhat.com> Reviewed-by: Jens Axboe <axboe@kernel.dk> Reported-by: Tong Tiangen <tongtiangen@huawei.com> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 961ebd1 commit a50026b

3 files changed

Lines changed: 42 additions & 42 deletions

File tree

fs/coredump.c

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,9 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
872872
loff_t pos;
873873
ssize_t n;
874874

875+
if (!page)
876+
return 0;
877+
875878
if (cprm->to_skip) {
876879
if (!__dump_skip(cprm, cprm->to_skip))
877880
return 0;
@@ -884,7 +887,6 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
884887
pos = file->f_pos;
885888
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
886889
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
887-
iov_iter_set_copy_mc(&iter);
888890
n = __kernel_write_iter(cprm->file, &iter, &pos);
889891
if (n != PAGE_SIZE)
890892
return 0;
@@ -895,10 +897,44 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
895897
return 1;
896898
}
897899

900+
/*
901+
* If we might get machine checks from kernel accesses during the
902+
* core dump, let's get those errors early rather than during the
903+
* IO. This is not performance-critical enough to warrant having
904+
* all the machine check logic in the iovec paths.
905+
*/
906+
#ifdef copy_mc_to_kernel
907+
908+
#define dump_page_alloc() alloc_page(GFP_KERNEL)
909+
#define dump_page_free(x) __free_page(x)
910+
static struct page *dump_page_copy(struct page *src, struct page *dst)
911+
{
912+
void *buf = kmap_local_page(src);
913+
size_t left = copy_mc_to_kernel(page_address(dst), buf, PAGE_SIZE);
914+
kunmap_local(buf);
915+
return left ? NULL : dst;
916+
}
917+
918+
#else
919+
920+
/* We just want to return non-NULL; it's never used. */
921+
#define dump_page_alloc() ERR_PTR(-EINVAL)
922+
#define dump_page_free(x) ((void)(x))
923+
static inline struct page *dump_page_copy(struct page *src, struct page *dst)
924+
{
925+
return src;
926+
}
927+
#endif
928+
898929
int dump_user_range(struct coredump_params *cprm, unsigned long start,
899930
unsigned long len)
900931
{
901932
unsigned long addr;
933+
struct page *dump_page;
934+
935+
dump_page = dump_page_alloc();
936+
if (!dump_page)
937+
return 0;
902938

903939
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
904940
struct page *page;
@@ -912,14 +948,17 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
912948
*/
913949
page = get_dump_page(addr);
914950
if (page) {
915-
int stop = !dump_emit_page(cprm, page);
951+
int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page));
916952
put_page(page);
917-
if (stop)
953+
if (stop) {
954+
dump_page_free(dump_page);
918955
return 0;
956+
}
919957
} else {
920958
dump_skip(cprm, PAGE_SIZE);
921959
}
922960
}
961+
dump_page_free(dump_page);
923962
return 1;
924963
}
925964
#endif

include/linux/uio.h

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ struct iov_iter_state {
4040

4141
struct iov_iter {
4242
u8 iter_type;
43-
bool copy_mc;
4443
bool nofault;
4544
bool data_source;
4645
size_t iov_offset;
@@ -248,22 +247,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
248247

249248
#ifdef CONFIG_ARCH_HAS_COPY_MC
250249
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
251-
static inline void iov_iter_set_copy_mc(struct iov_iter *i)
252-
{
253-
i->copy_mc = true;
254-
}
255-
256-
static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
257-
{
258-
return i->copy_mc;
259-
}
260250
#else
261251
#define _copy_mc_to_iter _copy_to_iter
262-
static inline void iov_iter_set_copy_mc(struct iov_iter *i) { }
263-
static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
264-
{
265-
return false;
266-
}
267252
#endif
268253

269254
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
@@ -355,7 +340,6 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
355340
WARN_ON(direction & ~(READ | WRITE));
356341
*i = (struct iov_iter) {
357342
.iter_type = ITER_UBUF,
358-
.copy_mc = false,
359343
.data_source = direction,
360344
.ubuf = buf,
361345
.count = count,

lib/iov_iter.c

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
166166
WARN_ON(direction & ~(READ | WRITE));
167167
*i = (struct iov_iter) {
168168
.iter_type = ITER_IOVEC,
169-
.copy_mc = false,
170169
.nofault = false,
171170
.data_source = direction,
172171
.__iov = iov,
@@ -244,27 +243,9 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
244243
EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
245244
#endif /* CONFIG_ARCH_HAS_COPY_MC */
246245

247-
static __always_inline
248-
size_t memcpy_from_iter_mc(void *iter_from, size_t progress,
249-
size_t len, void *to, void *priv2)
250-
{
251-
return copy_mc_to_kernel(to + progress, iter_from, len);
252-
}
253-
254-
static size_t __copy_from_iter_mc(void *addr, size_t bytes, struct iov_iter *i)
255-
{
256-
if (unlikely(i->count < bytes))
257-
bytes = i->count;
258-
if (unlikely(!bytes))
259-
return 0;
260-
return iterate_bvec(i, bytes, addr, NULL, memcpy_from_iter_mc);
261-
}
262-
263246
static __always_inline
264247
size_t __copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
265248
{
266-
if (unlikely(iov_iter_is_copy_mc(i)))
267-
return __copy_from_iter_mc(addr, bytes, i);
268249
return iterate_and_advance(i, bytes, addr,
269250
copy_from_user_iter, memcpy_from_iter);
270251
}
@@ -633,7 +614,6 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
633614
WARN_ON(direction & ~(READ | WRITE));
634615
*i = (struct iov_iter){
635616
.iter_type = ITER_KVEC,
636-
.copy_mc = false,
637617
.data_source = direction,
638618
.kvec = kvec,
639619
.nr_segs = nr_segs,
@@ -650,7 +630,6 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
650630
WARN_ON(direction & ~(READ | WRITE));
651631
*i = (struct iov_iter){
652632
.iter_type = ITER_BVEC,
653-
.copy_mc = false,
654633
.data_source = direction,
655634
.bvec = bvec,
656635
.nr_segs = nr_segs,
@@ -679,7 +658,6 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
679658
BUG_ON(direction & ~1);
680659
*i = (struct iov_iter) {
681660
.iter_type = ITER_XARRAY,
682-
.copy_mc = false,
683661
.data_source = direction,
684662
.xarray = xarray,
685663
.xarray_start = start,
@@ -703,7 +681,6 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
703681
BUG_ON(direction != READ);
704682
*i = (struct iov_iter){
705683
.iter_type = ITER_DISCARD,
706-
.copy_mc = false,
707684
.data_source = false,
708685
.count = count,
709686
.iov_offset = 0

0 commit comments

Comments
 (0)