@@ -102,6 +102,35 @@ struct afs_lookup_cookie {
102102 struct afs_fid fids [50 ];
103103};
104104
105+ /*
106+ * Drop the refs that we're holding on the pages we were reading into. We've
107+ * got refs on the first nr_pages pages.
108+ */
109+ static void afs_dir_read_cleanup (struct afs_read * req )
110+ {
111+ struct address_space * mapping = req -> vnode -> vfs_inode .i_mapping ;
112+ struct page * page ;
113+ pgoff_t last = req -> nr_pages - 1 ;
114+
115+ XA_STATE (xas , & mapping -> i_pages , 0 );
116+
117+ if (unlikely (!req -> nr_pages ))
118+ return ;
119+
120+ rcu_read_lock ();
121+ xas_for_each (& xas , page , last ) {
122+ if (xas_retry (& xas , page ))
123+ continue ;
124+ BUG_ON (xa_is_value (page ));
125+ BUG_ON (PageCompound (page ));
126+ ASSERTCMP (page -> mapping , = = , mapping );
127+
128+ put_page (page );
129+ }
130+
131+ rcu_read_unlock ();
132+ }
133+
105134/*
106135 * check that a directory page is valid
107136 */
@@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
127156 qty /= sizeof (union afs_xdr_dir_block );
128157
129158 /* check them */
130- dbuf = kmap (page );
159+ dbuf = kmap_atomic (page );
131160 for (tmp = 0 ; tmp < qty ; tmp ++ ) {
132161 if (dbuf -> blocks [tmp ].hdr .magic != AFS_DIR_MAGIC ) {
133162 printk ("kAFS: %s(%lx): bad magic %d/%d is %04hx\n" ,
@@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
146175 ((u8 * )& dbuf -> blocks [tmp ])[AFS_DIR_BLOCK_SIZE - 1 ] = 0 ;
147176 }
148177
149- kunmap ( page );
178+ kunmap_atomic ( dbuf );
150179
151180checked :
152181 afs_stat_v (dvnode , n_read_dir );
@@ -157,35 +186,74 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
157186}
158187
159188/*
160- * Check the contents of a directory that we've just read .
189+ * Dump the contents of a directory.
161190 */
162- static bool afs_dir_check_pages (struct afs_vnode * dvnode , struct afs_read * req )
191+ static void afs_dir_dump (struct afs_vnode * dvnode , struct afs_read * req )
163192{
164193 struct afs_xdr_dir_page * dbuf ;
165- unsigned int i , j , qty = PAGE_SIZE / sizeof (union afs_xdr_dir_block );
194+ struct address_space * mapping = dvnode -> vfs_inode .i_mapping ;
195+ struct page * page ;
196+ unsigned int i , qty = PAGE_SIZE / sizeof (union afs_xdr_dir_block );
197+ pgoff_t last = req -> nr_pages - 1 ;
166198
167- for (i = 0 ; i < req -> nr_pages ; i ++ )
168- if (!afs_dir_check_page (dvnode , req -> pages [i ], req -> actual_len ))
169- goto bad ;
170- return true;
199+ XA_STATE (xas , & mapping -> i_pages , 0 );
171200
172- bad :
173- pr_warn ("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n" ,
201+ pr_warn ("DIR %llx:%llx f=%llx l=%llx al=%llx\n" ,
174202 dvnode -> fid .vid , dvnode -> fid .vnode ,
175- req -> file_size , req -> len , req -> actual_len , req -> remain );
176- pr_warn ("DIR %llx %x %x %x\n" ,
177- req -> pos , req -> index , req -> nr_pages , req -> offset );
203+ req -> file_size , req -> len , req -> actual_len );
204+ pr_warn ("DIR %llx %x %zx %zx\n" ,
205+ req -> pos , req -> nr_pages ,
206+ req -> iter -> iov_offset , iov_iter_count (req -> iter ));
178207
179- for (i = 0 ; i < req -> nr_pages ; i ++ ) {
180- dbuf = kmap (req -> pages [i ]);
181- for (j = 0 ; j < qty ; j ++ ) {
182- union afs_xdr_dir_block * block = & dbuf -> blocks [j ];
208+ xas_for_each (& xas , page , last ) {
209+ if (xas_retry (& xas , page ))
210+ continue ;
211+
212+ BUG_ON (PageCompound (page ));
213+ BUG_ON (page -> mapping != mapping );
214+
215+ dbuf = kmap_atomic (page );
216+ for (i = 0 ; i < qty ; i ++ ) {
217+ union afs_xdr_dir_block * block = & dbuf -> blocks [i ];
183218
184- pr_warn ("[%02x ] %32phN\n" , i * qty + j , block );
219+ pr_warn ("[%02lx ] %32phN\n" , page -> index * qty + i , block );
185220 }
186- kunmap ( req -> pages [ i ] );
221+ kunmap_atomic ( dbuf );
187222 }
188- return false;
223+ }
224+
225+ /*
226+ * Check all the pages in a directory. All the pages are held pinned.
227+ */
228+ static int afs_dir_check (struct afs_vnode * dvnode , struct afs_read * req )
229+ {
230+ struct address_space * mapping = dvnode -> vfs_inode .i_mapping ;
231+ struct page * page ;
232+ pgoff_t last = req -> nr_pages - 1 ;
233+ int ret = 0 ;
234+
235+ XA_STATE (xas , & mapping -> i_pages , 0 );
236+
237+ if (unlikely (!req -> nr_pages ))
238+ return 0 ;
239+
240+ rcu_read_lock ();
241+ xas_for_each (& xas , page , last ) {
242+ if (xas_retry (& xas , page ))
243+ continue ;
244+
245+ BUG_ON (PageCompound (page ));
246+ BUG_ON (page -> mapping != mapping );
247+
248+ if (!afs_dir_check_page (dvnode , page , req -> file_size )) {
249+ afs_dir_dump (dvnode , req );
250+ ret = - EIO ;
251+ break ;
252+ }
253+ }
254+
255+ rcu_read_unlock ();
256+ return ret ;
189257}
190258
191259/*
@@ -214,81 +282,82 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
214282{
215283 struct afs_read * req ;
216284 loff_t i_size ;
217- int nr_pages , nr_inline , i , n ;
218- int ret = - ENOMEM ;
285+ int nr_pages , i , n ;
286+ int ret ;
287+
288+ _enter ("" );
219289
220- retry :
290+ req = kzalloc (sizeof (* req ), GFP_KERNEL );
291+ if (!req )
292+ return ERR_PTR (- ENOMEM );
293+
294+ refcount_set (& req -> usage , 1 );
295+ req -> vnode = dvnode ;
296+ req -> key = key_get (key );
297+ req -> cleanup = afs_dir_read_cleanup ;
298+
299+ expand :
221300 i_size = i_size_read (& dvnode -> vfs_inode );
222- if (i_size < 2048 )
223- return ERR_PTR (afs_bad (dvnode , afs_file_error_dir_small ));
301+ if (i_size < 2048 ) {
302+ ret = afs_bad (dvnode , afs_file_error_dir_small );
303+ goto error ;
304+ }
224305 if (i_size > 2048 * 1024 ) {
225306 trace_afs_file_error (dvnode , - EFBIG , afs_file_error_dir_big );
226- return ERR_PTR (- EFBIG );
307+ ret = - EFBIG ;
308+ goto error ;
227309 }
228310
229311 _enter ("%llu" , i_size );
230312
231- /* Get a request record to hold the page list. We want to hold it
232- * inline if we can, but we don't want to make an order 1 allocation.
233- */
234313 nr_pages = (i_size + PAGE_SIZE - 1 ) / PAGE_SIZE ;
235- nr_inline = nr_pages ;
236- if (nr_inline > (PAGE_SIZE - sizeof (* req )) / sizeof (struct page * ))
237- nr_inline = 0 ;
238314
239- req = kzalloc (struct_size (req , array , nr_inline ), GFP_KERNEL );
240- if (!req )
241- return ERR_PTR (- ENOMEM );
242-
243- refcount_set (& req -> usage , 1 );
244- req -> key = key_get (key );
245- req -> nr_pages = nr_pages ;
246315 req -> actual_len = i_size ; /* May change */
247316 req -> len = nr_pages * PAGE_SIZE ; /* We can ask for more than there is */
248317 req -> data_version = dvnode -> status .data_version ; /* May change */
249- if (nr_inline > 0 ) {
250- req -> pages = req -> array ;
251- } else {
252- req -> pages = kcalloc (nr_pages , sizeof (struct page * ),
253- GFP_KERNEL );
254- if (!req -> pages )
255- goto error ;
256- }
318+ iov_iter_xarray (& req -> def_iter , READ , & dvnode -> vfs_inode .i_mapping -> i_pages ,
319+ 0 , i_size );
320+ req -> iter = & req -> def_iter ;
257321
258- /* Get a list of all the pages that hold or will hold the directory
259- * content. We need to fill in any gaps that we might find where the
260- * memory reclaimer has been at work. If there are any gaps, we will
322+ /* Fill in any gaps that we might find where the memory reclaimer has
323+ * been at work and pin all the pages. If there are any gaps, we will
261324 * need to reread the entire directory contents.
262325 */
263- i = 0 ;
264- do {
326+ i = req -> nr_pages ;
327+ while (i < nr_pages ) {
328+ struct page * pages [8 ], * page ;
329+
265330 n = find_get_pages_contig (dvnode -> vfs_inode .i_mapping , i ,
266- req -> nr_pages - i ,
267- req -> pages + i );
268- _debug ("find %u at %u/%u" , n , i , req -> nr_pages );
331+ min_t (unsigned int , nr_pages - i ,
332+ ARRAY_SIZE (pages )),
333+ pages );
334+ _debug ("find %u at %u/%u" , n , i , nr_pages );
335+
269336 if (n == 0 ) {
270337 gfp_t gfp = dvnode -> vfs_inode .i_mapping -> gfp_mask ;
271338
272339 if (test_and_clear_bit (AFS_VNODE_DIR_VALID , & dvnode -> flags ))
273340 afs_stat_v (dvnode , n_inval );
274341
275342 ret = - ENOMEM ;
276- req -> pages [ i ] = __page_cache_alloc (gfp );
277- if (!req -> pages [ i ] )
343+ page = __page_cache_alloc (gfp );
344+ if (!page )
278345 goto error ;
279- ret = add_to_page_cache_lru (req -> pages [ i ] ,
346+ ret = add_to_page_cache_lru (page ,
280347 dvnode -> vfs_inode .i_mapping ,
281348 i , gfp );
282349 if (ret < 0 )
283350 goto error ;
284351
285- attach_page_private (req -> pages [i ], (void * )1 );
286- unlock_page (req -> pages [i ]);
352+ attach_page_private (page , (void * )1 );
353+ unlock_page (page );
354+ req -> nr_pages ++ ;
287355 i ++ ;
288356 } else {
357+ req -> nr_pages += n ;
289358 i += n ;
290359 }
291- } while ( i < req -> nr_pages );
360+ }
292361
293362 /* If we're going to reload, we need to lock all the pages to prevent
294363 * races.
@@ -312,12 +381,17 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
312381
313382 task_io_account_read (PAGE_SIZE * req -> nr_pages );
314383
315- if (req -> len < req -> file_size )
316- goto content_has_grown ;
384+ if (req -> len < req -> file_size ) {
385+ /* The content has grown, so we need to expand the
386+ * buffer.
387+ */
388+ up_write (& dvnode -> validate_lock );
389+ goto expand ;
390+ }
317391
318392 /* Validate the data we just read. */
319- ret = - EIO ;
320- if (! afs_dir_check_pages ( dvnode , req ) )
393+ ret = afs_dir_check ( dvnode , req ) ;
394+ if (ret < 0 )
321395 goto error_unlock ;
322396
323397 // TODO: Trim excess pages
@@ -335,11 +409,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
335409 afs_put_read (req );
336410 _leave (" = %d" , ret );
337411 return ERR_PTR (ret );
338-
339- content_has_grown :
340- up_write (& dvnode -> validate_lock );
341- afs_put_read (req );
342- goto retry ;
343412}
344413
345414/*
@@ -449,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
449518 struct afs_read * req ;
450519 struct page * page ;
451520 unsigned blkoff , limit ;
521+ void __rcu * * slot ;
452522 int ret ;
453523
454524 _enter ("{%lu},%u,," , dir -> i_ino , (unsigned )ctx -> pos );
@@ -473,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
473543 blkoff = ctx -> pos & ~(sizeof (union afs_xdr_dir_block ) - 1 );
474544
475545 /* Fetch the appropriate page from the directory and re-add it
476- * to the LRU.
546+ * to the LRU. We have all the pages pinned with an extra ref.
477547 */
478- page = req -> pages [blkoff / PAGE_SIZE ];
548+ rcu_read_lock ();
549+ page = NULL ;
550+ slot = radix_tree_lookup_slot (& dvnode -> vfs_inode .i_mapping -> i_pages ,
551+ blkoff / PAGE_SIZE );
552+ if (slot )
553+ page = radix_tree_deref_slot (slot );
554+ rcu_read_unlock ();
479555 if (!page ) {
480556 ret = afs_bad (dvnode , afs_file_error_dir_missing_page );
481557 break ;
0 commit comments