@@ -921,13 +921,26 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
921921 return 0 ;
922922}
923923
924+ /* Lock two rings at once. The rings must be different! */
925+ static void lock_two_rings (struct io_ring_ctx * ctx1 , struct io_ring_ctx * ctx2 )
926+ {
927+ if (ctx1 > ctx2 )
928+ swap (ctx1 , ctx2 );
929+ mutex_lock (& ctx1 -> uring_lock );
930+ mutex_lock_nested (& ctx2 -> uring_lock , SINGLE_DEPTH_NESTING );
931+ }
932+
933+ /* Both rings are locked by the caller. */
924934static int io_clone_buffers (struct io_ring_ctx * ctx , struct io_ring_ctx * src_ctx ,
925935 struct io_uring_clone_buffers * arg )
926936{
927937 struct io_rsrc_data data ;
928938 int i , ret , off , nr ;
929939 unsigned int nbufs ;
930940
941+ lockdep_assert_held (& ctx -> uring_lock );
942+ lockdep_assert_held (& src_ctx -> uring_lock );
943+
931944 /*
932945 * Accounting state is shared between the two rings; that only works if
933946 * both rings are accounted towards the same counters.
@@ -942,7 +955,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
942955 if (ctx -> buf_table .nr && !(arg -> flags & IORING_REGISTER_DST_REPLACE ))
943956 return - EBUSY ;
944957
945- nbufs = READ_ONCE ( src_ctx -> buf_table .nr ) ;
958+ nbufs = src_ctx -> buf_table .nr ;
946959 if (!arg -> nr )
947960 arg -> nr = nbufs ;
948961 else if (arg -> nr > nbufs )
@@ -966,27 +979,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
966979 }
967980 }
968981
969- /*
970- * Drop our own lock here. We'll setup the data we need and reference
971- * the source buffers, then re-grab, check, and assign at the end.
972- */
973- mutex_unlock (& ctx -> uring_lock );
974-
975- mutex_lock (& src_ctx -> uring_lock );
976982 ret = - ENXIO ;
977983 nbufs = src_ctx -> buf_table .nr ;
978984 if (!nbufs )
979- goto out_unlock ;
985+ goto out_free ;
980986 ret = - EINVAL ;
981987 if (!arg -> nr )
982988 arg -> nr = nbufs ;
983989 else if (arg -> nr > nbufs )
984- goto out_unlock ;
990+ goto out_free ;
985991 ret = - EOVERFLOW ;
986992 if (check_add_overflow (arg -> nr , arg -> src_off , & off ))
987- goto out_unlock ;
993+ goto out_free ;
988994 if (off > nbufs )
989- goto out_unlock ;
995+ goto out_free ;
990996
991997 off = arg -> dst_off ;
992998 i = arg -> src_off ;
@@ -1001,7 +1007,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
10011007 dst_node = io_rsrc_node_alloc (ctx , IORING_RSRC_BUFFER );
10021008 if (!dst_node ) {
10031009 ret = - ENOMEM ;
1004- goto out_unlock ;
1010+ goto out_free ;
10051011 }
10061012
10071013 refcount_inc (& src_node -> buf -> refs );
@@ -1011,10 +1017,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
10111017 i ++ ;
10121018 }
10131019
1014- /* Have a ref on the bufs now, drop src lock and re-grab our own lock */
1015- mutex_unlock (& src_ctx -> uring_lock );
1016- mutex_lock (& ctx -> uring_lock );
1017-
10181020 /*
10191021 * If asked for replace, put the old table. data->nodes[] holds both
10201022 * old and new nodes at this point.
@@ -1023,24 +1025,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
10231025 io_rsrc_data_free (ctx , & ctx -> buf_table );
10241026
10251027 /*
1026- * ctx->buf_table should be empty now - either the contents are being
1027- * replaced and we just freed the table, or someone raced setting up
1028- * a buffer table while the clone was happening. If not empty, fall
1029- * through to failure handling .
1028+ * ctx->buf_table must be empty now - either the contents are being
1029+ * replaced and we just freed the table, or the contents are being
1030+ * copied to a ring that does not have buffers yet (checked at function
1031+ * entry) .
10301032 */
1031- if (!ctx -> buf_table .nr ) {
1032- ctx -> buf_table = data ;
1033- return 0 ;
1034- }
1033+ WARN_ON_ONCE (ctx -> buf_table .nr );
1034+ ctx -> buf_table = data ;
1035+ return 0 ;
10351036
1036- mutex_unlock (& ctx -> uring_lock );
1037- mutex_lock (& src_ctx -> uring_lock );
1038- /* someone raced setting up buffers, dump ours */
1039- ret = - EBUSY ;
1040- out_unlock :
1037+ out_free :
10411038 io_rsrc_data_free (ctx , & data );
1042- mutex_unlock (& src_ctx -> uring_lock );
1043- mutex_lock (& ctx -> uring_lock );
10441039 return ret ;
10451040}
10461041
@@ -1054,6 +1049,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
10541049int io_register_clone_buffers (struct io_ring_ctx * ctx , void __user * arg )
10551050{
10561051 struct io_uring_clone_buffers buf ;
1052+ struct io_ring_ctx * src_ctx ;
10571053 bool registered_src ;
10581054 struct file * file ;
10591055 int ret ;
@@ -1071,7 +1067,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
10711067 file = io_uring_register_get_file (buf .src_fd , registered_src );
10721068 if (IS_ERR (file ))
10731069 return PTR_ERR (file );
1074- ret = io_clone_buffers (ctx , file -> private_data , & buf );
1070+
1071+ src_ctx = file -> private_data ;
1072+ if (src_ctx != ctx ) {
1073+ mutex_unlock (& ctx -> uring_lock );
1074+ lock_two_rings (ctx , src_ctx );
1075+ }
1076+
1077+ ret = io_clone_buffers (ctx , src_ctx , & buf );
1078+
1079+ if (src_ctx != ctx )
1080+ mutex_unlock (& src_ctx -> uring_lock );
1081+
10751082 if (!registered_src )
10761083 fput (file );
10771084 return ret ;
0 commit comments