Skip to content

Commit 3c4aa44

Browse files
committed
Merge tag 'ceph-for-6.4-rc1' of https://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "A few filesystem improvements, with a rather nasty use-after-free fix from Xiubo intended for stable" * tag 'ceph-for-6.4-rc1' of https://github.com/ceph/ceph-client: ceph: reorder fields in 'struct ceph_snapid_map' ceph: pass ino# instead of old_dentry if it's disconnected ceph: fix potential use-after-free bug when trimming caps ceph: implement writeback livelock avoidance using page tagging ceph: do not print the whole xattr value if it's too long
2 parents 8e15605 + db2993a commit 3c4aa44

8 files changed

Lines changed: 102 additions & 47 deletions

File tree

fs/ceph/addr.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,7 @@ static int ceph_writepages_start(struct address_space *mapping,
808808
bool should_loop, range_whole = false;
809809
bool done = false;
810810
bool caching = ceph_is_cache_enabled(inode);
811+
xa_mark_t tag;
811812

812813
if (wbc->sync_mode == WB_SYNC_NONE &&
813814
fsc->write_congested)
@@ -834,6 +835,11 @@ static int ceph_writepages_start(struct address_space *mapping,
834835
start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
835836
index = start_index;
836837

838+
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
839+
tag = PAGECACHE_TAG_TOWRITE;
840+
} else {
841+
tag = PAGECACHE_TAG_DIRTY;
842+
}
837843
retry:
838844
/* find oldest snap context with dirty data */
839845
snapc = get_oldest_context(inode, &ceph_wbc, NULL);
@@ -872,6 +878,9 @@ static int ceph_writepages_start(struct address_space *mapping,
872878
dout(" non-head snapc, range whole\n");
873879
}
874880

881+
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
882+
tag_pages_for_writeback(mapping, index, end);
883+
875884
ceph_put_snap_context(last_snapc);
876885
last_snapc = snapc;
877886

@@ -888,7 +897,7 @@ static int ceph_writepages_start(struct address_space *mapping,
888897

889898
get_more_pages:
890899
nr_folios = filemap_get_folios_tag(mapping, &index,
891-
end, PAGECACHE_TAG_DIRTY, &fbatch);
900+
end, tag, &fbatch);
892901
dout("pagevec_lookup_range_tag got %d\n", nr_folios);
893902
if (!nr_folios && !locked_pages)
894903
break;

fs/ceph/caps.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
431431
*
432432
* Called with i_ceph_lock held.
433433
*/
434-
static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
434+
struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
435435
{
436436
struct ceph_cap *cap;
437437
struct rb_node *n = ci->i_caps.rb_node;

fs/ceph/debugfs.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,14 +248,20 @@ static int metrics_caps_show(struct seq_file *s, void *p)
248248
return 0;
249249
}
250250

251-
static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
251+
static int caps_show_cb(struct inode *inode, int mds, void *p)
252252
{
253+
struct ceph_inode_info *ci = ceph_inode(inode);
253254
struct seq_file *s = p;
254-
255-
seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
256-
cap->session->s_mds,
257-
ceph_cap_string(cap->issued),
258-
ceph_cap_string(cap->implemented));
255+
struct ceph_cap *cap;
256+
257+
spin_lock(&ci->i_ceph_lock);
258+
cap = __get_cap_for_mds(ci, mds);
259+
if (cap)
260+
seq_printf(s, "0x%-17llx%-3d%-17s%-17s\n", ceph_ino(inode),
261+
cap->session->s_mds,
262+
ceph_cap_string(cap->issued),
263+
ceph_cap_string(cap->implemented));
264+
spin_unlock(&ci->i_ceph_lock);
259265
return 0;
260266
}
261267

fs/ceph/dir.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,15 +1050,18 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
10501050
struct ceph_mds_request *req;
10511051
int err;
10521052

1053+
if (dentry->d_flags & DCACHE_DISCONNECTED)
1054+
return -EINVAL;
1055+
10531056
err = ceph_wait_on_conflict_unlink(dentry);
10541057
if (err)
10551058
return err;
10561059

10571060
if (ceph_snap(dir) != CEPH_NOSNAP)
10581061
return -EROFS;
10591062

1060-
dout("link in dir %p old_dentry %p dentry %p\n", dir,
1061-
old_dentry, dentry);
1063+
dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
1064+
dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
10621065
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
10631066
if (IS_ERR(req)) {
10641067
d_drop(dentry);
@@ -1067,6 +1070,12 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
10671070
req->r_dentry = dget(dentry);
10681071
req->r_num_caps = 2;
10691072
req->r_old_dentry = dget(old_dentry);
1073+
/*
1074+
* The old_dentry maybe a DCACHE_DISCONNECTED dentry, then we
1075+
* will just pass the ino# to MDSs.
1076+
*/
1077+
if (old_dentry->d_flags & DCACHE_DISCONNECTED)
1078+
req->r_ino2 = ceph_vino(d_inode(old_dentry));
10701079
req->r_parent = dir;
10711080
ihold(dir);
10721081
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);

fs/ceph/mds_client.c

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,8 +1632,8 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
16321632
* Caller must hold session s_mutex.
16331633
*/
16341634
int ceph_iterate_session_caps(struct ceph_mds_session *session,
1635-
int (*cb)(struct inode *, struct ceph_cap *,
1636-
void *), void *arg)
1635+
int (*cb)(struct inode *, int mds, void *),
1636+
void *arg)
16371637
{
16381638
struct list_head *p;
16391639
struct ceph_cap *cap;
@@ -1645,13 +1645,16 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
16451645
spin_lock(&session->s_cap_lock);
16461646
p = session->s_caps.next;
16471647
while (p != &session->s_caps) {
1648+
int mds;
1649+
16481650
cap = list_entry(p, struct ceph_cap, session_caps);
16491651
inode = igrab(&cap->ci->netfs.inode);
16501652
if (!inode) {
16511653
p = p->next;
16521654
continue;
16531655
}
16541656
session->s_cap_iterator = cap;
1657+
mds = cap->mds;
16551658
spin_unlock(&session->s_cap_lock);
16561659

16571660
if (last_inode) {
@@ -1663,7 +1666,7 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
16631666
old_cap = NULL;
16641667
}
16651668

1666-
ret = cb(inode, cap, arg);
1669+
ret = cb(inode, mds, arg);
16671670
last_inode = inode;
16681671

16691672
spin_lock(&session->s_cap_lock);
@@ -1696,20 +1699,25 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
16961699
return ret;
16971700
}
16981701

1699-
static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
1700-
void *arg)
1702+
static int remove_session_caps_cb(struct inode *inode, int mds, void *arg)
17011703
{
17021704
struct ceph_inode_info *ci = ceph_inode(inode);
17031705
bool invalidate = false;
1704-
int iputs;
1706+
struct ceph_cap *cap;
1707+
int iputs = 0;
17051708

1706-
dout("removing cap %p, ci is %p, inode is %p\n",
1707-
cap, ci, &ci->netfs.inode);
17081709
spin_lock(&ci->i_ceph_lock);
1709-
iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
1710+
cap = __get_cap_for_mds(ci, mds);
1711+
if (cap) {
1712+
dout(" removing cap %p, ci is %p, inode is %p\n",
1713+
cap, ci, &ci->netfs.inode);
1714+
1715+
iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
1716+
}
17101717
spin_unlock(&ci->i_ceph_lock);
17111718

1712-
wake_up_all(&ci->i_cap_wq);
1719+
if (cap)
1720+
wake_up_all(&ci->i_cap_wq);
17131721
if (invalidate)
17141722
ceph_queue_invalidate(inode);
17151723
while (iputs--)
@@ -1780,8 +1788,7 @@ enum {
17801788
*
17811789
* caller must hold s_mutex.
17821790
*/
1783-
static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
1784-
void *arg)
1791+
static int wake_up_session_cb(struct inode *inode, int mds, void *arg)
17851792
{
17861793
struct ceph_inode_info *ci = ceph_inode(inode);
17871794
unsigned long ev = (unsigned long)arg;
@@ -1792,12 +1799,14 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
17921799
ci->i_requested_max_size = 0;
17931800
spin_unlock(&ci->i_ceph_lock);
17941801
} else if (ev == RENEWCAPS) {
1795-
if (cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) {
1796-
/* mds did not re-issue stale cap */
1797-
spin_lock(&ci->i_ceph_lock);
1802+
struct ceph_cap *cap;
1803+
1804+
spin_lock(&ci->i_ceph_lock);
1805+
cap = __get_cap_for_mds(ci, mds);
1806+
/* mds did not re-issue stale cap */
1807+
if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen))
17981808
cap->issued = cap->implemented = CEPH_CAP_PIN;
1799-
spin_unlock(&ci->i_ceph_lock);
1800-
}
1809+
spin_unlock(&ci->i_ceph_lock);
18011810
} else if (ev == FORCE_RO) {
18021811
}
18031812
wake_up_all(&ci->i_cap_wq);
@@ -1959,16 +1968,22 @@ static bool drop_negative_children(struct dentry *dentry)
19591968
* Yes, this is a bit sloppy. Our only real goal here is to respond to
19601969
* memory pressure from the MDS, though, so it needn't be perfect.
19611970
*/
1962-
static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1971+
static int trim_caps_cb(struct inode *inode, int mds, void *arg)
19631972
{
19641973
int *remaining = arg;
19651974
struct ceph_inode_info *ci = ceph_inode(inode);
19661975
int used, wanted, oissued, mine;
1976+
struct ceph_cap *cap;
19671977

19681978
if (*remaining <= 0)
19691979
return -1;
19701980

19711981
spin_lock(&ci->i_ceph_lock);
1982+
cap = __get_cap_for_mds(ci, mds);
1983+
if (!cap) {
1984+
spin_unlock(&ci->i_ceph_lock);
1985+
return 0;
1986+
}
19721987
mine = cap->issued | cap->implemented;
19731988
used = __ceph_caps_used(ci);
19741989
wanted = __ceph_caps_file_wanted(ci);
@@ -2555,6 +2570,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
25552570
u64 ino1 = 0, ino2 = 0;
25562571
int pathlen1 = 0, pathlen2 = 0;
25572572
bool freepath1 = false, freepath2 = false;
2573+
struct dentry *old_dentry = NULL;
25582574
int len;
25592575
u16 releases;
25602576
void *p, *end;
@@ -2572,7 +2588,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
25722588
}
25732589

25742590
/* If r_old_dentry is set, then assume that its parent is locked */
2575-
ret = set_request_path_attr(NULL, req->r_old_dentry,
2591+
if (req->r_old_dentry &&
2592+
!(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
2593+
old_dentry = req->r_old_dentry;
2594+
ret = set_request_path_attr(NULL, old_dentry,
25762595
req->r_old_dentry_dir,
25772596
req->r_path2, req->r_ino2.ino,
25782597
&path2, &pathlen2, &ino2, &freepath2, true);
@@ -3911,26 +3930,22 @@ static struct dentry* d_find_primary(struct inode *inode)
39113930
/*
39123931
* Encode information about a cap for a reconnect with the MDS.
39133932
*/
3914-
static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
3915-
void *arg)
3933+
static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
39163934
{
39173935
union {
39183936
struct ceph_mds_cap_reconnect v2;
39193937
struct ceph_mds_cap_reconnect_v1 v1;
39203938
} rec;
3921-
struct ceph_inode_info *ci = cap->ci;
3939+
struct ceph_inode_info *ci = ceph_inode(inode);
39223940
struct ceph_reconnect_state *recon_state = arg;
39233941
struct ceph_pagelist *pagelist = recon_state->pagelist;
39243942
struct dentry *dentry;
3943+
struct ceph_cap *cap;
39253944
char *path;
3926-
int pathlen = 0, err;
3945+
int pathlen = 0, err = 0;
39273946
u64 pathbase;
39283947
u64 snap_follows;
39293948

3930-
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
3931-
inode, ceph_vinop(inode), cap, cap->cap_id,
3932-
ceph_cap_string(cap->issued));
3933-
39343949
dentry = d_find_primary(inode);
39353950
if (dentry) {
39363951
/* set pathbase to parent dir when msg_version >= 2 */
@@ -3947,6 +3962,15 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
39473962
}
39483963

39493964
spin_lock(&ci->i_ceph_lock);
3965+
cap = __get_cap_for_mds(ci, mds);
3966+
if (!cap) {
3967+
spin_unlock(&ci->i_ceph_lock);
3968+
goto out_err;
3969+
}
3970+
dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
3971+
inode, ceph_vinop(inode), cap, cap->cap_id,
3972+
ceph_cap_string(cap->issued));
3973+
39503974
cap->seq = 0; /* reset cap seq */
39513975
cap->issue_seq = 0; /* and issue_seq */
39523976
cap->mseq = 0; /* and migrate_seq */

fs/ceph/mds_client.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,8 @@ struct ceph_snapid_map {
355355
struct rb_node node;
356356
struct list_head lru;
357357
atomic_t ref;
358-
u64 snap;
359358
dev_t dev;
359+
u64 snap;
360360
unsigned long last_used;
361361
};
362362

@@ -541,8 +541,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
541541
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
542542
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
543543
extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
544-
int (*cb)(struct inode *,
545-
struct ceph_cap *, void *),
544+
int (*cb)(struct inode *, int mds, void *),
546545
void *arg);
547546
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
548547

fs/ceph/super.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1192,6 +1192,8 @@ extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
11921192
struct ceph_mds_session *session);
11931193
void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
11941194
struct ceph_inode_info *ci);
1195+
extern struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci,
1196+
int mds);
11951197
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
11961198
int mds);
11971199
extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,

fs/ceph/xattr.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,8 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
535535
return NULL;
536536
}
537537

538+
#define MAX_XATTR_VAL_PRINT_LEN 256
539+
538540
static int __set_xattr(struct ceph_inode_info *ci,
539541
const char *name, int name_len,
540542
const char *val, int val_len,
@@ -597,7 +599,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
597599
xattr->should_free_name = update_xattr;
598600

599601
ci->i_xattrs.count++;
600-
dout("__set_xattr count=%d\n", ci->i_xattrs.count);
602+
dout("%s count=%d\n", __func__, ci->i_xattrs.count);
601603
} else {
602604
kfree(*newxattr);
603605
*newxattr = NULL;
@@ -625,11 +627,13 @@ static int __set_xattr(struct ceph_inode_info *ci,
625627
if (new) {
626628
rb_link_node(&xattr->node, parent, p);
627629
rb_insert_color(&xattr->node, &ci->i_xattrs.index);
628-
dout("__set_xattr_val p=%p\n", p);
630+
dout("%s p=%p\n", __func__, p);
629631
}
630632

631-
dout("__set_xattr_val added %llx.%llx xattr %p %.*s=%.*s\n",
632-
ceph_vinop(&ci->netfs.inode), xattr, name_len, name, val_len, val);
633+
dout("%s added %llx.%llx xattr %p %.*s=%.*s%s\n", __func__,
634+
ceph_vinop(&ci->netfs.inode), xattr, name_len, name,
635+
min(val_len, MAX_XATTR_VAL_PRINT_LEN), val,
636+
val_len > MAX_XATTR_VAL_PRINT_LEN ? "..." : "");
633637

634638
return 0;
635639
}
@@ -655,13 +659,15 @@ static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
655659
else if (c > 0)
656660
p = &(*p)->rb_right;
657661
else {
658-
dout("__get_xattr %s: found %.*s\n", name,
659-
xattr->val_len, xattr->val);
662+
int len = min(xattr->val_len, MAX_XATTR_VAL_PRINT_LEN);
663+
664+
dout("%s %s: found %.*s%s\n", __func__, name, len,
665+
xattr->val, xattr->val_len > len ? "..." : "");
660666
return xattr;
661667
}
662668
}
663669

664-
dout("__get_xattr %s: not found\n", name);
670+
dout("%s %s: not found\n", __func__, name);
665671

666672
return NULL;
667673
}

0 commit comments

Comments
 (0)