Skip to content

Commit a148a20

Browse files
jchu314atgithubakpm00
authored andcommitted
mm/memory-failure: fix missing ->mf_stats count in hugetlb poison
When a newly poisoned subpage ends up in an already poisoned hugetlb folio, 'num_poisoned_pages' is incremented, but the per node ->mf_stats is not. Fix the inconsistency by designating action_result() to update them both. While at it, define __get_huge_page_for_hwpoison() return values in terms of symbol names for better readibility. Also rename folio_set_hugetlb_hwpoison() to hugetlb_update_hwpoison() since the function does more than the conventional bit setting and the fact three possible return values are expected. Link: https://lkml.kernel.org/r/20260120232234.3462258-1-jane.chu@oracle.com Fixes: 18f41fa ("mm: memory-failure: bump memory failure stats to pglist_data") Signed-off-by: Jane Chu <jane.chu@oracle.com> Acked-by: Miaohe Lin <linmiaohe@huawei.com> Cc: Chris Mason <clm@meta.com> Cc: David Hildenbrand <david@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Jiaqi Yan <jiaqiyan@google.com> Cc: Liam R. Howlett <Liam.Howlett@oracle.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Naoya Horiguchi <nao.horiguchi@gmail.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Suren Baghdasaryan <surenb@google.com> Cc: William Roche <william.roche@oracle.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent a0f3c08 commit a148a20

1 file changed

Lines changed: 56 additions & 37 deletions

File tree

mm/memory-failure.c

Lines changed: 56 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,33 +1883,40 @@ static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
18831883
return count;
18841884
}
18851885

1886-
static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
1886+
#define MF_HUGETLB_FREED 0 /* freed hugepage */
1887+
#define MF_HUGETLB_IN_USED 1 /* in-use hugepage */
1888+
#define MF_HUGETLB_NON_HUGEPAGE 2 /* not a hugepage */
1889+
#define MF_HUGETLB_FOLIO_PRE_POISONED 3 /* folio already poisoned */
1890+
#define MF_HUGETLB_PAGE_PRE_POISONED 4 /* exact page already poisoned */
1891+
#define MF_HUGETLB_RETRY 5 /* hugepage is busy, retry */
1892+
/*
1893+
* Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list
1894+
* to keep track of the poisoned pages.
1895+
*/
1896+
static int hugetlb_update_hwpoison(struct folio *folio, struct page *page)
18871897
{
18881898
struct llist_head *head;
18891899
struct raw_hwp_page *raw_hwp;
18901900
struct raw_hwp_page *p;
1891-
int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0;
1901+
int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_FOLIO_PRE_POISONED : 0;
18921902

18931903
/*
18941904
* Once the hwpoison hugepage has lost reliable raw error info,
18951905
* there is little meaning to keep additional error info precisely,
18961906
* so skip to add additional raw error info.
18971907
*/
18981908
if (folio_test_hugetlb_raw_hwp_unreliable(folio))
1899-
return -EHWPOISON;
1909+
return MF_HUGETLB_FOLIO_PRE_POISONED;
19001910
head = raw_hwp_list_head(folio);
19011911
llist_for_each_entry(p, head->first, node) {
19021912
if (p->page == page)
1903-
return -EHWPOISON;
1913+
return MF_HUGETLB_PAGE_PRE_POISONED;
19041914
}
19051915

19061916
raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC);
19071917
if (raw_hwp) {
19081918
raw_hwp->page = page;
19091919
llist_add(&raw_hwp->node, head);
1910-
/* the first error event will be counted in action_result(). */
1911-
if (ret)
1912-
num_poisoned_pages_inc(page_to_pfn(page));
19131920
} else {
19141921
/*
19151922
* Failed to save raw error info. We no longer trace all
@@ -1957,42 +1964,39 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
19571964

19581965
/*
19591966
* Called from hugetlb code with hugetlb_lock held.
1960-
*
1961-
* Return values:
1962-
* 0 - free hugepage
1963-
* 1 - in-use hugepage
1964-
* 2 - not a hugepage
1965-
* -EBUSY - the hugepage is busy (try to retry)
1966-
* -EHWPOISON - the hugepage is already hwpoisoned
19671967
*/
19681968
int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
19691969
bool *migratable_cleared)
19701970
{
19711971
struct page *page = pfn_to_page(pfn);
19721972
struct folio *folio = page_folio(page);
1973-
int ret = 2; /* fallback to normal page handling */
19741973
bool count_increased = false;
1974+
int ret, rc;
19751975

1976-
if (!folio_test_hugetlb(folio))
1976+
if (!folio_test_hugetlb(folio)) {
1977+
ret = MF_HUGETLB_NON_HUGEPAGE;
19771978
goto out;
1978-
1979-
if (flags & MF_COUNT_INCREASED) {
1980-
ret = 1;
1979+
} else if (flags & MF_COUNT_INCREASED) {
1980+
ret = MF_HUGETLB_IN_USED;
19811981
count_increased = true;
19821982
} else if (folio_test_hugetlb_freed(folio)) {
1983-
ret = 0;
1983+
ret = MF_HUGETLB_FREED;
19841984
} else if (folio_test_hugetlb_migratable(folio)) {
1985-
ret = folio_try_get(folio);
1986-
if (ret)
1985+
if (folio_try_get(folio)) {
1986+
ret = MF_HUGETLB_IN_USED;
19871987
count_increased = true;
1988+
} else {
1989+
ret = MF_HUGETLB_FREED;
1990+
}
19881991
} else {
1989-
ret = -EBUSY;
1992+
ret = MF_HUGETLB_RETRY;
19901993
if (!(flags & MF_NO_RETRY))
19911994
goto out;
19921995
}
19931996

1994-
if (folio_set_hugetlb_hwpoison(folio, page)) {
1995-
ret = -EHWPOISON;
1997+
rc = hugetlb_update_hwpoison(folio, page);
1998+
if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) {
1999+
ret = rc;
19962000
goto out;
19972001
}
19982002

@@ -2017,10 +2021,16 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
20172021
* with basic operations like hugepage allocation/free/demotion.
20182022
* So some of prechecks for hwpoison (pinning, and testing/setting
20192023
* PageHWPoison) should be done in single hugetlb_lock range.
2024+
* Returns:
2025+
* 0 - not hugetlb, or recovered
2026+
* -EBUSY - not recovered
2027+
* -EOPNOTSUPP - hwpoison_filter'ed
2028+
* -EHWPOISON - folio or exact page already poisoned
2029+
* -EFAULT - kill_accessing_process finds current->mm null
20202030
*/
20212031
static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb)
20222032
{
2023-
int res;
2033+
int res, rv;
20242034
struct page *p = pfn_to_page(pfn);
20252035
struct folio *folio;
20262036
unsigned long page_flags;
@@ -2029,22 +2039,31 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
20292039
*hugetlb = 1;
20302040
retry:
20312041
res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
2032-
if (res == 2) { /* fallback to normal page handling */
2042+
switch (res) {
2043+
case MF_HUGETLB_NON_HUGEPAGE: /* fallback to normal page handling */
20332044
*hugetlb = 0;
20342045
return 0;
2035-
} else if (res == -EHWPOISON) {
2036-
if (flags & MF_ACTION_REQUIRED) {
2037-
folio = page_folio(p);
2038-
res = kill_accessing_process(current, folio_pfn(folio), flags);
2039-
}
2040-
action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
2041-
return res;
2042-
} else if (res == -EBUSY) {
2046+
case MF_HUGETLB_RETRY:
20432047
if (!(flags & MF_NO_RETRY)) {
20442048
flags |= MF_NO_RETRY;
20452049
goto retry;
20462050
}
20472051
return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED);
2052+
case MF_HUGETLB_FOLIO_PRE_POISONED:
2053+
case MF_HUGETLB_PAGE_PRE_POISONED:
2054+
rv = -EHWPOISON;
2055+
if (flags & MF_ACTION_REQUIRED) {
2056+
folio = page_folio(p);
2057+
rv = kill_accessing_process(current, folio_pfn(folio), flags);
2058+
}
2059+
if (res == MF_HUGETLB_PAGE_PRE_POISONED)
2060+
action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
2061+
else
2062+
action_result(pfn, MF_MSG_HUGE, MF_FAILED);
2063+
return rv;
2064+
default:
2065+
WARN_ON((res != MF_HUGETLB_FREED) && (res != MF_HUGETLB_IN_USED));
2066+
break;
20482067
}
20492068

20502069
folio = page_folio(p);
@@ -2055,7 +2074,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
20552074
if (migratable_cleared)
20562075
folio_set_hugetlb_migratable(folio);
20572076
folio_unlock(folio);
2058-
if (res == 1)
2077+
if (res == MF_HUGETLB_IN_USED)
20592078
folio_put(folio);
20602079
return -EOPNOTSUPP;
20612080
}
@@ -2064,7 +2083,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
20642083
* Handling free hugepage. The possible race with hugepage allocation
20652084
* or demotion can be prevented by PageHWPoison flag.
20662085
*/
2067-
if (res == 0) {
2086+
if (res == MF_HUGETLB_FREED) {
20682087
folio_unlock(folio);
20692088
if (__page_handle_poison(p) > 0) {
20702089
page_ref_inc(p);

0 commit comments

Comments
 (0)