Skip to content

Commit 5898b43

Browse files
jchu314atgithubdjbw
authored andcommitted
mce: fix set_mce_nospec to always unmap the whole page
The set_memory_uc() approach doesn't work well in all cases. As Dan pointed out when "The VMM unmapped the bad page from guest physical space and passed the machine check to the guest." "The guest gets virtual #MC on an access to that page. When the guest tries to do set_memory_uc() and instructs cpa_flush() to do clean caches that results in taking another fault / exception perhaps because the VMM unmapped the page from the guest." Since the driver has special knowledge to handle NP or UC, mark the poisoned page with NP and let driver handle it when it comes down to repair. Please refer to discussions here for more details. https://lore.kernel.org/all/CAPcyv4hrXPb1tASBZUg-GgdVs0OOFKXMXLiHmktg_kFi7YBMyQ@mail.gmail.com/ Now since poisoned page is marked as not-present, in order to avoid writing to a not-present page and trigger kernel Oops, also fix pmem_do_write(). Fixes: 284ce40 ("x86/memory_failure: Introduce {set, clear}_mce_nospec()") Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Jane Chu <jane.chu@oracle.com> Acked-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/r/165272615484.103830.2563950688772226611.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
1 parent b3fdf93 commit 5898b43

4 files changed

Lines changed: 23 additions & 40 deletions

File tree

arch/x86/kernel/cpu/mce/core.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
579579

580580
pfn = mce->addr >> PAGE_SHIFT;
581581
if (!memory_failure(pfn, 0)) {
582-
set_mce_nospec(pfn, whole_page(mce));
582+
set_mce_nospec(pfn);
583583
mce->kflags |= MCE_HANDLED_UC;
584584
}
585585

@@ -1316,7 +1316,7 @@ static void kill_me_maybe(struct callback_head *cb)
13161316

13171317
ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
13181318
if (!ret) {
1319-
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
1319+
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
13201320
sync_core();
13211321
return;
13221322
}
@@ -1342,7 +1342,7 @@ static void kill_me_never(struct callback_head *cb)
13421342
p->mce_count = 0;
13431343
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
13441344
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
1345-
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
1345+
set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
13461346
}
13471347

13481348
static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))

arch/x86/mm/pat/set_memory.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1925,14 +1925,9 @@ int set_memory_wb(unsigned long addr, int numpages)
19251925
}
19261926
EXPORT_SYMBOL(set_memory_wb);
19271927

1928-
/*
1929-
* Prevent speculative access to the page by either unmapping
1930-
* it (if we do not require access to any part of the page) or
1931-
* marking it uncacheable (if we want to try to retrieve data
1932-
* from non-poisoned lines in the page).
1933-
*/
1928+
/* Prevent speculative access to a page by marking it not-present */
19341929
#ifdef CONFIG_X86_64
1935-
int set_mce_nospec(unsigned long pfn, bool unmap)
1930+
int set_mce_nospec(unsigned long pfn)
19361931
{
19371932
unsigned long decoy_addr;
19381933
int rc;
@@ -1954,19 +1949,23 @@ int set_mce_nospec(unsigned long pfn, bool unmap)
19541949
*/
19551950
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
19561951

1957-
if (unmap)
1958-
rc = set_memory_np(decoy_addr, 1);
1959-
else
1960-
rc = set_memory_uc(decoy_addr, 1);
1952+
rc = set_memory_np(decoy_addr, 1);
19611953
if (rc)
19621954
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
19631955
return rc;
19641956
}
19651957

1958+
static int set_memory_present(unsigned long *addr, int numpages)
1959+
{
1960+
return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
1961+
}
1962+
19661963
/* Restore full speculative operation to the pfn. */
19671964
int clear_mce_nospec(unsigned long pfn)
19681965
{
1969-
return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1);
1966+
unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
1967+
1968+
return set_memory_present(&addr, 1);
19701969
}
19711970
EXPORT_SYMBOL_GPL(clear_mce_nospec);
19721971
#endif /* CONFIG_X86_64 */

drivers/nvdimm/pmem.c

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -158,36 +158,20 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem,
158158
struct page *page, unsigned int page_off,
159159
sector_t sector, unsigned int len)
160160
{
161-
blk_status_t rc = BLK_STS_OK;
162-
bool bad_pmem = false;
163161
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
164162
void *pmem_addr = pmem->virt_addr + pmem_off;
165163

166-
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
167-
bad_pmem = true;
164+
if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) {
165+
blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len);
166+
167+
if (rc != BLK_STS_OK)
168+
return rc;
169+
}
168170

169-
/*
170-
* Note that we write the data both before and after
171-
* clearing poison. The write before clear poison
172-
* handles situations where the latest written data is
173-
* preserved and the clear poison operation simply marks
174-
* the address range as valid without changing the data.
175-
* In this case application software can assume that an
176-
* interrupted write will either return the new good
177-
* data or an error.
178-
*
179-
* However, if pmem_clear_poison() leaves the data in an
180-
* indeterminate state we need to perform the write
181-
* after clear poison.
182-
*/
183171
flush_dcache_page(page);
184172
write_pmem(pmem_addr, page, page_off, len);
185-
if (unlikely(bad_pmem)) {
186-
rc = pmem_clear_poison(pmem, pmem_off, len);
187-
write_pmem(pmem_addr, page, page_off, len);
188-
}
189173

190-
return rc;
174+
return BLK_STS_OK;
191175
}
192176

193177
static void pmem_submit_bio(struct bio *bio)

include/linux/set_memory.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,10 @@ static inline bool can_set_direct_map(void)
4343
#endif /* CONFIG_ARCH_HAS_SET_DIRECT_MAP */
4444

4545
#ifdef CONFIG_X86_64
46-
int set_mce_nospec(unsigned long pfn, bool unmap);
46+
int set_mce_nospec(unsigned long pfn);
4747
int clear_mce_nospec(unsigned long pfn);
4848
#else
49-
static inline int set_mce_nospec(unsigned long pfn, bool unmap)
49+
static inline int set_mce_nospec(unsigned long pfn)
5050
{
5151
return 0;
5252
}

0 commit comments

Comments
 (0)