@@ -2045,112 +2045,63 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
20452045}
20462046
20472047/*
2048- * This function is meant to pre-load the iterator for the zone init from
2049- * a given point.
2050- * Specifically it walks through the ranges starting with initial index
2051- * passed to it until we are caught up to the first_init_pfn value and
2052- * exits there. If we never encounter the value we return false indicating
2053- * there are no valid ranges left.
2054- */
2055- static bool __init
2056- deferred_init_mem_pfn_range_in_zone (u64 * i , struct zone * zone ,
2057- unsigned long * spfn , unsigned long * epfn ,
2058- unsigned long first_init_pfn )
2059- {
2060- u64 j = * i ;
2061-
2062- if (j == 0 )
2063- __next_mem_pfn_range_in_zone (& j , zone , spfn , epfn );
2064-
2065- /*
2066- * Start out by walking through the ranges in this zone that have
2067- * already been initialized. We don't need to do anything with them
2068- * so we just need to flush them out of the system.
2069- */
2070- for_each_free_mem_pfn_range_in_zone_from (j , zone , spfn , epfn ) {
2071- if (* epfn <= first_init_pfn )
2072- continue ;
2073- if (* spfn < first_init_pfn )
2074- * spfn = first_init_pfn ;
2075- * i = j ;
2076- return true;
2077- }
2078-
2079- return false;
2080- }
2081-
2082- /*
2083- * Initialize and free pages. We do it in two loops: first we initialize
2084- * struct page, then free to buddy allocator, because while we are
2085- * freeing pages we can access pages that are ahead (computing buddy
2086- * page in __free_one_page()).
2048+ * Initialize and free pages.
2049+ *
2050+ * At this point reserved pages and struct pages that correspond to holes in
2051+ * memblock.memory are already intialized so every free range has a valid
2052+ * memory map around it.
2053+ * This ensures that access of pages that are ahead of the range being
2054+ * initialized (computing buddy page in __free_one_page()) always reads a valid
2055+ * struct page.
20872056 *
2088- * In order to try and keep some memory in the cache we have the loop
2089- * broken along max page order boundaries. This way we will not cause
2090- * any issues with the buddy page computation.
2057+ * In order to try and improve CPU cache locality we have the loop broken along
2058+ * max page order boundaries.
20912059 */
20922060static unsigned long __init
2093- deferred_init_maxorder ( u64 * i , struct zone * zone , unsigned long * start_pfn ,
2094- unsigned long * end_pfn )
2061+ deferred_init_memmap_chunk ( unsigned long start_pfn , unsigned long end_pfn ,
2062+ struct zone * zone )
20952063{
2096- unsigned long mo_pfn = ALIGN (* start_pfn + 1 , MAX_ORDER_NR_PAGES );
2097- unsigned long spfn = * start_pfn , epfn = * end_pfn ;
2064+ int nid = zone_to_nid (zone );
20982065 unsigned long nr_pages = 0 ;
2099- u64 j = * i ;
2100-
2101- /* First we loop through and initialize the page values */
2102- for_each_free_mem_pfn_range_in_zone_from (j , zone , start_pfn , end_pfn ) {
2103- unsigned long t ;
2104-
2105- if (mo_pfn <= * start_pfn )
2106- break ;
2066+ phys_addr_t start , end ;
2067+ u64 i = 0 ;
21072068
2108- t = min (mo_pfn , * end_pfn );
2109- nr_pages += deferred_init_pages (zone , * start_pfn , t );
2069+ for_each_free_mem_range (i , nid , 0 , & start , & end , NULL ) {
2070+ unsigned long spfn = PFN_UP (start );
2071+ unsigned long epfn = PFN_DOWN (end );
21102072
2111- if (mo_pfn < * end_pfn ) {
2112- * start_pfn = mo_pfn ;
2073+ if (spfn >= end_pfn )
21132074 break ;
2114- }
2115- }
21162075
2117- /* Reset values and now loop through freeing pages as needed */
2118- swap ( j , * i );
2076+ spfn = max ( spfn , start_pfn );
2077+ epfn = min ( epfn , end_pfn );
21192078
2120- for_each_free_mem_pfn_range_in_zone_from (j , zone , & spfn , & epfn ) {
2121- unsigned long t ;
2079+ while (spfn < epfn ) {
2080+ unsigned long mo_pfn = ALIGN (spfn + 1 , MAX_ORDER_NR_PAGES );
2081+ unsigned long chunk_end = min (mo_pfn , epfn );
21222082
2123- if ( mo_pfn <= spfn )
2124- break ;
2083+ nr_pages += deferred_init_pages ( zone , spfn , chunk_end );
2084+ deferred_free_pages ( spfn , chunk_end - spfn ) ;
21252085
2126- t = min (mo_pfn , epfn );
2127- deferred_free_pages (spfn , t - spfn );
2086+ spfn = chunk_end ;
21282087
2129- if (mo_pfn <= epfn )
2130- break ;
2088+ if (irqs_disabled ())
2089+ touch_nmi_watchdog ();
2090+ else
2091+ cond_resched ();
2092+ }
21312093 }
21322094
21332095 return nr_pages ;
21342096}
21352097
21362098static void __init
2137- deferred_init_memmap_chunk (unsigned long start_pfn , unsigned long end_pfn ,
2138- void * arg )
2099+ deferred_init_memmap_job (unsigned long start_pfn , unsigned long end_pfn ,
2100+ void * arg )
21392101{
2140- unsigned long spfn , epfn ;
21412102 struct zone * zone = arg ;
2142- u64 i = 0 ;
2143-
2144- deferred_init_mem_pfn_range_in_zone (& i , zone , & spfn , & epfn , start_pfn );
21452103
2146- /*
2147- * Initialize and free pages in MAX_PAGE_ORDER sized increments so that
2148- * we can avoid introducing any issues with the buddy allocator.
2149- */
2150- while (spfn < end_pfn ) {
2151- deferred_init_maxorder (& i , zone , & spfn , & epfn );
2152- cond_resched ();
2153- }
2104+ deferred_init_memmap_chunk (start_pfn , end_pfn , zone );
21542105}
21552106
21562107static unsigned int __init
@@ -2164,12 +2115,10 @@ static int __init deferred_init_memmap(void *data)
21642115{
21652116 pg_data_t * pgdat = data ;
21662117 const struct cpumask * cpumask = cpumask_of_node (pgdat -> node_id );
2167- unsigned long spfn = 0 , epfn = 0 ;
2168- unsigned long first_init_pfn , flags ;
2118+ int max_threads = deferred_page_init_max_threads ( cpumask ) ;
2119+ unsigned long first_init_pfn , last_pfn , flags ;
21692120 unsigned long start = jiffies ;
21702121 struct zone * zone ;
2171- int max_threads ;
2172- u64 i = 0 ;
21732122
21742123 /* Bind memory initialisation thread to a local node if possible */
21752124 if (!cpumask_empty (cpumask ))
@@ -2197,24 +2146,20 @@ static int __init deferred_init_memmap(void *data)
21972146
21982147 /* Only the highest zone is deferred */
21992148 zone = pgdat -> node_zones + pgdat -> nr_zones - 1 ;
2149+ last_pfn = SECTION_ALIGN_UP (zone_end_pfn (zone ));
22002150
2201- max_threads = deferred_page_init_max_threads (cpumask );
2151+ struct padata_mt_job job = {
2152+ .thread_fn = deferred_init_memmap_job ,
2153+ .fn_arg = zone ,
2154+ .start = first_init_pfn ,
2155+ .size = last_pfn - first_init_pfn ,
2156+ .align = PAGES_PER_SECTION ,
2157+ .min_chunk = PAGES_PER_SECTION ,
2158+ .max_threads = max_threads ,
2159+ .numa_aware = false,
2160+ };
22022161
2203- while (deferred_init_mem_pfn_range_in_zone (& i , zone , & spfn , & epfn , first_init_pfn )) {
2204- first_init_pfn = ALIGN (epfn , PAGES_PER_SECTION );
2205- struct padata_mt_job job = {
2206- .thread_fn = deferred_init_memmap_chunk ,
2207- .fn_arg = zone ,
2208- .start = spfn ,
2209- .size = first_init_pfn - spfn ,
2210- .align = PAGES_PER_SECTION ,
2211- .min_chunk = PAGES_PER_SECTION ,
2212- .max_threads = max_threads ,
2213- .numa_aware = false,
2214- };
2215-
2216- padata_do_multithreaded (& job );
2217- }
2162+ padata_do_multithreaded (& job );
22182163
22192164 /* Sanity check that the next zone really is unpopulated */
22202165 WARN_ON (pgdat -> nr_zones < MAX_NR_ZONES && populated_zone (++ zone ));
@@ -2239,12 +2184,11 @@ static int __init deferred_init_memmap(void *data)
22392184 */
22402185bool __init deferred_grow_zone (struct zone * zone , unsigned int order )
22412186{
2242- unsigned long nr_pages_needed = ALIGN (1 << order , PAGES_PER_SECTION );
2187+ unsigned long nr_pages_needed = SECTION_ALIGN_UP (1 << order );
22432188 pg_data_t * pgdat = zone -> zone_pgdat ;
22442189 unsigned long first_deferred_pfn = pgdat -> first_deferred_pfn ;
22452190 unsigned long spfn , epfn , flags ;
22462191 unsigned long nr_pages = 0 ;
2247- u64 i = 0 ;
22482192
22492193 /* Only the last zone may have deferred pages */
22502194 if (zone_end_pfn (zone ) != pgdat_end_pfn (pgdat ))
@@ -2261,37 +2205,26 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
22612205 return true;
22622206 }
22632207
2264- /* If the zone is empty somebody else may have cleared out the zone */
2265- if (!deferred_init_mem_pfn_range_in_zone (& i , zone , & spfn , & epfn ,
2266- first_deferred_pfn )) {
2267- pgdat -> first_deferred_pfn = ULONG_MAX ;
2268- pgdat_resize_unlock (pgdat , & flags );
2269- /* Retry only once. */
2270- return first_deferred_pfn != ULONG_MAX ;
2208+ /*
2209+ * Initialize at least nr_pages_needed in section chunks.
2210+ * If a section has less free memory than nr_pages_needed, the next
2211+ * section will be also initialized.
2212+ * Note, that it still does not guarantee that allocation of order can
2213+ * be satisfied if the sections are fragmented because of memblock
2214+ * allocations.
2215+ */
2216+ for (spfn = first_deferred_pfn , epfn = SECTION_ALIGN_UP (spfn + 1 );
2217+ nr_pages < nr_pages_needed && spfn < zone_end_pfn (zone );
2218+ spfn = epfn , epfn += PAGES_PER_SECTION ) {
2219+ nr_pages += deferred_init_memmap_chunk (spfn , epfn , zone );
22712220 }
22722221
22732222 /*
2274- * Initialize and free pages in MAX_PAGE_ORDER sized increments so
2275- * that we can avoid introducing any issues with the buddy
2276- * allocator.
2223+ * There were no pages to initialize and free which means the zone's
2224+ * memory map is completely initialized.
22772225 */
2278- while (spfn < epfn ) {
2279- /* update our first deferred PFN for this section */
2280- first_deferred_pfn = spfn ;
2281-
2282- nr_pages += deferred_init_maxorder (& i , zone , & spfn , & epfn );
2283- touch_nmi_watchdog ();
2284-
2285- /* We should only stop along section boundaries */
2286- if ((first_deferred_pfn ^ spfn ) < PAGES_PER_SECTION )
2287- continue ;
2288-
2289- /* If our quota has been met we can stop here */
2290- if (nr_pages >= nr_pages_needed )
2291- break ;
2292- }
2226+ pgdat -> first_deferred_pfn = nr_pages ? spfn : ULONG_MAX ;
22932227
2294- pgdat -> first_deferred_pfn = spfn ;
22952228 pgdat_resize_unlock (pgdat , & flags );
22962229
22972230 return nr_pages > 0 ;
0 commit comments