@@ -184,7 +184,7 @@ static int ceph_releasepage(struct page *page, gfp_t gfp)
184184
185185static void ceph_netfs_expand_readahead (struct netfs_read_request * rreq )
186186{
187- struct inode * inode = rreq -> mapping -> host ;
187+ struct inode * inode = rreq -> inode ;
188188 struct ceph_inode_info * ci = ceph_inode (inode );
189189 struct ceph_file_layout * lo = & ci -> i_layout ;
190190 u32 blockoff ;
@@ -201,7 +201,7 @@ static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq)
201201
202202static bool ceph_netfs_clamp_length (struct netfs_read_subrequest * subreq )
203203{
204- struct inode * inode = subreq -> rreq -> mapping -> host ;
204+ struct inode * inode = subreq -> rreq -> inode ;
205205 struct ceph_fs_client * fsc = ceph_inode_to_client (inode );
206206 struct ceph_inode_info * ci = ceph_inode (inode );
207207 u64 objno , objoff ;
@@ -244,10 +244,63 @@ static void finish_netfs_read(struct ceph_osd_request *req)
244244 iput (req -> r_inode );
245245}
246246
247+ static bool ceph_netfs_issue_op_inline (struct netfs_read_subrequest * subreq )
248+ {
249+ struct netfs_read_request * rreq = subreq -> rreq ;
250+ struct inode * inode = rreq -> inode ;
251+ struct ceph_mds_reply_info_parsed * rinfo ;
252+ struct ceph_mds_reply_info_in * iinfo ;
253+ struct ceph_mds_request * req ;
254+ struct ceph_mds_client * mdsc = ceph_sb_to_mdsc (inode -> i_sb );
255+ struct ceph_inode_info * ci = ceph_inode (inode );
256+ struct iov_iter iter ;
257+ ssize_t err = 0 ;
258+ size_t len ;
259+
260+ __set_bit (NETFS_SREQ_CLEAR_TAIL , & subreq -> flags );
261+ __clear_bit (NETFS_SREQ_WRITE_TO_CACHE , & subreq -> flags );
262+
263+ if (subreq -> start >= inode -> i_size )
264+ goto out ;
265+
266+ /* We need to fetch the inline data. */
267+ req = ceph_mdsc_create_request (mdsc , CEPH_MDS_OP_GETATTR , USE_ANY_MDS );
268+ if (IS_ERR (req )) {
269+ err = PTR_ERR (req );
270+ goto out ;
271+ }
272+ req -> r_ino1 = ci -> i_vino ;
273+ req -> r_args .getattr .mask = cpu_to_le32 (CEPH_STAT_CAP_INLINE_DATA );
274+ req -> r_num_caps = 2 ;
275+
276+ err = ceph_mdsc_do_request (mdsc , NULL , req );
277+ if (err < 0 )
278+ goto out ;
279+
280+ rinfo = & req -> r_reply_info ;
281+ iinfo = & rinfo -> targeti ;
282+ if (iinfo -> inline_version == CEPH_INLINE_NONE ) {
283+ /* The data got uninlined */
284+ ceph_mdsc_put_request (req );
285+ return false;
286+ }
287+
288+ len = min_t (size_t , iinfo -> inline_len - subreq -> start , subreq -> len );
289+ iov_iter_xarray (& iter , READ , & rreq -> mapping -> i_pages , subreq -> start , len );
290+ err = copy_to_iter (iinfo -> inline_data + subreq -> start , len , & iter );
291+ if (err == 0 )
292+ err = - EFAULT ;
293+
294+ ceph_mdsc_put_request (req );
295+ out :
296+ netfs_subreq_terminated (subreq , err , false);
297+ return true;
298+ }
299+
247300static void ceph_netfs_issue_op (struct netfs_read_subrequest * subreq )
248301{
249302 struct netfs_read_request * rreq = subreq -> rreq ;
250- struct inode * inode = rreq -> mapping -> host ;
303+ struct inode * inode = rreq -> inode ;
251304 struct ceph_inode_info * ci = ceph_inode (inode );
252305 struct ceph_fs_client * fsc = ceph_inode_to_client (inode );
253306 struct ceph_osd_request * req ;
@@ -258,6 +311,10 @@ static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq)
258311 int err = 0 ;
259312 u64 len = subreq -> len ;
260313
314+ if (ci -> i_inline_version != CEPH_INLINE_NONE &&
315+ ceph_netfs_issue_op_inline (subreq ))
316+ return ;
317+
261318 req = ceph_osdc_new_request (& fsc -> client -> osdc , & ci -> i_layout , vino , subreq -> start , & len ,
262319 0 , 1 , CEPH_OSD_OP_READ ,
263320 CEPH_OSD_FLAG_READ | fsc -> client -> osdc .client -> options -> read_from_replica ,
@@ -326,23 +383,9 @@ static int ceph_readpage(struct file *file, struct page *subpage)
326383 size_t len = folio_size (folio );
327384 u64 off = folio_file_pos (folio );
328385
329- if (ci -> i_inline_version != CEPH_INLINE_NONE ) {
330- /*
331- * Uptodate inline data should have been added
332- * into page cache while getting Fcr caps.
333- */
334- if (off == 0 ) {
335- folio_unlock (folio );
336- return - EINVAL ;
337- }
338- zero_user_segment (& folio -> page , 0 , folio_size (folio ));
339- folio_mark_uptodate (folio );
340- folio_unlock (folio );
341- return 0 ;
342- }
343-
344- dout ("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n" ,
345- vino .ino , vino .snap , file , off , len , folio , folio_index (folio ));
386+ dout ("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n inline %d" ,
387+ vino .ino , vino .snap , file , off , len , folio , folio_index (folio ),
388+ ci -> i_inline_version != CEPH_INLINE_NONE );
346389
347390 return netfs_readpage (file , folio , & ceph_netfs_read_ops , NULL );
348391}
@@ -1281,45 +1324,11 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
12811324 struct page * * pagep , void * * fsdata )
12821325{
12831326 struct inode * inode = file_inode (file );
1284- struct ceph_inode_info * ci = ceph_inode (inode );
12851327 struct folio * folio = NULL ;
1286- pgoff_t index = pos >> PAGE_SHIFT ;
12871328 int r ;
12881329
1289- /*
1290- * Uninlining should have already been done and everything updated, EXCEPT
1291- * for inline_version sent to the MDS.
1292- */
1293- if (ci -> i_inline_version != CEPH_INLINE_NONE ) {
1294- unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE ;
1295- if (aop_flags & AOP_FLAG_NOFS )
1296- fgp_flags |= FGP_NOFS ;
1297- folio = __filemap_get_folio (mapping , index , fgp_flags ,
1298- mapping_gfp_mask (mapping ));
1299- if (!folio )
1300- return - ENOMEM ;
1301-
1302- /*
1303- * The inline_version on a new inode is set to 1. If that's the
1304- * case, then the folio is brand new and isn't yet Uptodate.
1305- */
1306- r = 0 ;
1307- if (index == 0 && ci -> i_inline_version != 1 ) {
1308- if (!folio_test_uptodate (folio )) {
1309- WARN_ONCE (1 , "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n" ,
1310- ci -> i_inline_version );
1311- r = - EINVAL ;
1312- }
1313- goto out ;
1314- }
1315- zero_user_segment (& folio -> page , 0 , folio_size (folio ));
1316- folio_mark_uptodate (folio );
1317- goto out ;
1318- }
1319-
13201330 r = netfs_write_begin (file , inode -> i_mapping , pos , len , 0 , & folio , NULL ,
13211331 & ceph_netfs_read_ops , NULL );
1322- out :
13231332 if (r == 0 )
13241333 folio_wait_fscache (folio );
13251334 if (r < 0 ) {
@@ -1515,19 +1524,6 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
15151524 sb_start_pagefault (inode -> i_sb );
15161525 ceph_block_sigs (& oldset );
15171526
1518- if (ci -> i_inline_version != CEPH_INLINE_NONE ) {
1519- struct page * locked_page = NULL ;
1520- if (off == 0 ) {
1521- lock_page (page );
1522- locked_page = page ;
1523- }
1524- err = ceph_uninline_data (vma -> vm_file , locked_page );
1525- if (locked_page )
1526- unlock_page (locked_page );
1527- if (err < 0 )
1528- goto out_free ;
1529- }
1530-
15311527 if (off + thp_size (page ) <= size )
15321528 len = thp_size (page );
15331529 else
@@ -1584,11 +1580,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
15841580 ceph_put_snap_context (snapc );
15851581 } while (err == 0 );
15861582
1587- if (ret == VM_FAULT_LOCKED ||
1588- ci -> i_inline_version != CEPH_INLINE_NONE ) {
1583+ if (ret == VM_FAULT_LOCKED ) {
15891584 int dirty ;
15901585 spin_lock (& ci -> i_ceph_lock );
1591- ci -> i_inline_version = CEPH_INLINE_NONE ;
15921586 dirty = __ceph_mark_dirty_caps (ci , CEPH_CAP_FILE_WR ,
15931587 & prealloc_cf );
15941588 spin_unlock (& ci -> i_ceph_lock );
@@ -1652,16 +1646,30 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
16521646 }
16531647}
16541648
1655- int ceph_uninline_data (struct file * filp , struct page * locked_page )
1649+ int ceph_uninline_data (struct file * file )
16561650{
1657- struct inode * inode = file_inode (filp );
1651+ struct inode * inode = file_inode (file );
16581652 struct ceph_inode_info * ci = ceph_inode (inode );
16591653 struct ceph_fs_client * fsc = ceph_inode_to_client (inode );
16601654 struct ceph_osd_request * req ;
1661- struct page * page = NULL ;
1662- u64 len , inline_version ;
1655+ struct ceph_cap_flush * prealloc_cf ;
1656+ struct folio * folio = NULL ;
1657+ u64 inline_version = CEPH_INLINE_NONE ;
1658+ struct page * pages [1 ];
16631659 int err = 0 ;
1664- bool from_pagecache = false;
1660+ u64 len ;
1661+
1662+ prealloc_cf = ceph_alloc_cap_flush ();
1663+ if (!prealloc_cf )
1664+ return - ENOMEM ;
1665+
1666+ folio = read_mapping_folio (inode -> i_mapping , 0 , file );
1667+ if (IS_ERR (folio )) {
1668+ err = PTR_ERR (folio );
1669+ goto out ;
1670+ }
1671+
1672+ folio_lock (folio );
16651673
16661674 spin_lock (& ci -> i_ceph_lock );
16671675 inline_version = ci -> i_inline_version ;
@@ -1672,53 +1680,19 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
16721680
16731681 if (inline_version == 1 || /* initial version, no data */
16741682 inline_version == CEPH_INLINE_NONE )
1675- goto out ;
1676-
1677- if (locked_page ) {
1678- page = locked_page ;
1679- WARN_ON (!PageUptodate (page ));
1680- } else if (ceph_caps_issued (ci ) &
1681- (CEPH_CAP_FILE_CACHE |CEPH_CAP_FILE_LAZYIO )) {
1682- page = find_get_page (inode -> i_mapping , 0 );
1683- if (page ) {
1684- if (PageUptodate (page )) {
1685- from_pagecache = true;
1686- lock_page (page );
1687- } else {
1688- put_page (page );
1689- page = NULL ;
1690- }
1691- }
1692- }
1683+ goto out_unlock ;
16931684
1694- if (page ) {
1695- len = i_size_read (inode );
1696- if (len > PAGE_SIZE )
1697- len = PAGE_SIZE ;
1698- } else {
1699- page = __page_cache_alloc (GFP_NOFS );
1700- if (!page ) {
1701- err = - ENOMEM ;
1702- goto out ;
1703- }
1704- err = __ceph_do_getattr (inode , page ,
1705- CEPH_STAT_CAP_INLINE_DATA , true);
1706- if (err < 0 ) {
1707- /* no inline data */
1708- if (err == - ENODATA )
1709- err = 0 ;
1710- goto out ;
1711- }
1712- len = err ;
1713- }
1685+ len = i_size_read (inode );
1686+ if (len > folio_size (folio ))
1687+ len = folio_size (folio );
17141688
17151689 req = ceph_osdc_new_request (& fsc -> client -> osdc , & ci -> i_layout ,
17161690 ceph_vino (inode ), 0 , & len , 0 , 1 ,
17171691 CEPH_OSD_OP_CREATE , CEPH_OSD_FLAG_WRITE ,
17181692 NULL , 0 , 0 , false);
17191693 if (IS_ERR (req )) {
17201694 err = PTR_ERR (req );
1721- goto out ;
1695+ goto out_unlock ;
17221696 }
17231697
17241698 req -> r_mtime = inode -> i_mtime ;
@@ -1727,7 +1701,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
17271701 err = ceph_osdc_wait_request (& fsc -> client -> osdc , req );
17281702 ceph_osdc_put_request (req );
17291703 if (err < 0 )
1730- goto out ;
1704+ goto out_unlock ;
17311705
17321706 req = ceph_osdc_new_request (& fsc -> client -> osdc , & ci -> i_layout ,
17331707 ceph_vino (inode ), 0 , & len , 1 , 3 ,
@@ -1736,10 +1710,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
17361710 ci -> i_truncate_size , false);
17371711 if (IS_ERR (req )) {
17381712 err = PTR_ERR (req );
1739- goto out ;
1713+ goto out_unlock ;
17401714 }
17411715
1742- osd_req_op_extent_osd_data_pages (req , 1 , & page , len , 0 , false, false);
1716+ pages [0 ] = folio_page (folio , 0 );
1717+ osd_req_op_extent_osd_data_pages (req , 1 , pages , len , 0 , false, false);
17431718
17441719 {
17451720 __le64 xattr_buf = cpu_to_le64 (inline_version );
@@ -1749,7 +1724,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
17491724 CEPH_OSD_CMPXATTR_OP_GT ,
17501725 CEPH_OSD_CMPXATTR_MODE_U64 );
17511726 if (err )
1752- goto out_put ;
1727+ goto out_put_req ;
17531728 }
17541729
17551730 {
@@ -1760,7 +1735,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
17601735 "inline_version" ,
17611736 xattr_buf , xattr_len , 0 , 0 );
17621737 if (err )
1763- goto out_put ;
1738+ goto out_put_req ;
17641739 }
17651740
17661741 req -> r_mtime = inode -> i_mtime ;
@@ -1771,19 +1746,28 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
17711746 ceph_update_write_metrics (& fsc -> mdsc -> metric , req -> r_start_latency ,
17721747 req -> r_end_latency , len , err );
17731748
1774- out_put :
1749+ if (!err ) {
1750+ int dirty ;
1751+
1752+ /* Set to CAP_INLINE_NONE and dirty the caps */
1753+ down_read (& fsc -> mdsc -> snap_rwsem );
1754+ spin_lock (& ci -> i_ceph_lock );
1755+ ci -> i_inline_version = CEPH_INLINE_NONE ;
1756+ dirty = __ceph_mark_dirty_caps (ci , CEPH_CAP_FILE_WR , & prealloc_cf );
1757+ spin_unlock (& ci -> i_ceph_lock );
1758+ up_read (& fsc -> mdsc -> snap_rwsem );
1759+ if (dirty )
1760+ __mark_inode_dirty (inode , dirty );
1761+ }
1762+ out_put_req :
17751763 ceph_osdc_put_request (req );
17761764 if (err == - ECANCELED )
17771765 err = 0 ;
1766+ out_unlock :
1767+ folio_unlock (folio );
1768+ folio_put (folio );
17781769out :
1779- if (page && page != locked_page ) {
1780- if (from_pagecache ) {
1781- unlock_page (page );
1782- put_page (page );
1783- } else
1784- __free_pages (page , 0 );
1785- }
1786-
1770+ ceph_free_cap_flush (prealloc_cf );
17871771 dout ("uninline_data %p %llx.%llx inline_version %llu = %d\n" ,
17881772 inode , ceph_vinop (inode ), inline_version , err );
17891773 return err ;
0 commit comments