@@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
9090 if (err )
9191 return err ;
9292
93- msk -> first = ssock -> sk ;
94- msk -> subflow = ssock ;
93+ WRITE_ONCE ( msk -> first , ssock -> sk ) ;
94+ WRITE_ONCE ( msk -> subflow , ssock ) ;
9595 subflow = mptcp_subflow_ctx (ssock -> sk );
9696 list_add (& subflow -> node , & msk -> conn_list );
9797 sock_hold (ssock -> sk );
@@ -603,7 +603,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
603603 WRITE_ONCE (msk -> ack_seq , msk -> ack_seq + 1 );
604604 WRITE_ONCE (msk -> rcv_data_fin , 0 );
605605
606- sk -> sk_shutdown |= RCV_SHUTDOWN ;
606+ WRITE_ONCE ( sk -> sk_shutdown , sk -> sk_shutdown | RCV_SHUTDOWN ) ;
607607 smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
608608
609609 switch (sk -> sk_state ) {
@@ -825,6 +825,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
825825 mptcp_data_unlock (sk );
826826}
827827
828+ static void mptcp_subflow_joined (struct mptcp_sock * msk , struct sock * ssk )
829+ {
830+ mptcp_subflow_ctx (ssk )-> map_seq = READ_ONCE (msk -> ack_seq );
831+ WRITE_ONCE (msk -> allow_infinite_fallback , false);
832+ mptcp_event (MPTCP_EVENT_SUB_ESTABLISHED , msk , ssk , GFP_ATOMIC );
833+ }
834+
828835static bool __mptcp_finish_join (struct mptcp_sock * msk , struct sock * ssk )
829836{
830837 struct sock * sk = (struct sock * )msk ;
@@ -839,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
839846 mptcp_sock_graft (ssk , sk -> sk_socket );
840847
841848 mptcp_sockopt_sync_locked (msk , ssk );
849+ mptcp_subflow_joined (msk , ssk );
842850 return true;
843851}
844852
@@ -910,7 +918,7 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
910918 /* hopefully temporary hack: propagate shutdown status
911919 * to msk, when all subflows agree on it
912920 */
913- sk -> sk_shutdown |= RCV_SHUTDOWN ;
921+ WRITE_ONCE ( sk -> sk_shutdown , sk -> sk_shutdown | RCV_SHUTDOWN ) ;
914922
915923 smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
916924 sk -> sk_data_ready (sk );
@@ -1702,7 +1710,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
17021710
17031711 lock_sock (ssk );
17041712 msg -> msg_flags |= MSG_DONTWAIT ;
1705- msk -> connect_flags = O_NONBLOCK ;
17061713 msk -> fastopening = 1 ;
17071714 ret = tcp_sendmsg_fastopen (ssk , msg , copied_syn , len , NULL );
17081715 msk -> fastopening = 0 ;
@@ -2283,7 +2290,7 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
22832290{
22842291 if (msk -> subflow ) {
22852292 iput (SOCK_INODE (msk -> subflow ));
2286- msk -> subflow = NULL ;
2293+ WRITE_ONCE ( msk -> subflow , NULL ) ;
22872294 }
22882295}
22892296
@@ -2420,7 +2427,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
24202427 sock_put (ssk );
24212428
24222429 if (ssk == msk -> first )
2423- msk -> first = NULL ;
2430+ WRITE_ONCE ( msk -> first , NULL ) ;
24242431
24252432out :
24262433 if (ssk == msk -> last_snd )
@@ -2527,7 +2534,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
25272534 }
25282535
25292536 inet_sk_state_store (sk , TCP_CLOSE );
2530- sk -> sk_shutdown = SHUTDOWN_MASK ;
2537+ WRITE_ONCE ( sk -> sk_shutdown , SHUTDOWN_MASK ) ;
25312538 smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
25322539 set_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags );
25332540
@@ -2721,7 +2728,7 @@ static int __mptcp_init_sock(struct sock *sk)
27212728 WRITE_ONCE (msk -> rmem_released , 0 );
27222729 msk -> timer_ival = TCP_RTO_MIN ;
27232730
2724- msk -> first = NULL ;
2731+ WRITE_ONCE ( msk -> first , NULL ) ;
27252732 inet_csk (sk )-> icsk_sync_mss = mptcp_sync_mss ;
27262733 WRITE_ONCE (msk -> csum_enabled , mptcp_is_checksum_enabled (sock_net (sk )));
27272734 WRITE_ONCE (msk -> allow_infinite_fallback , true);
@@ -2959,7 +2966,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
29592966 bool do_cancel_work = false;
29602967 int subflows_alive = 0 ;
29612968
2962- sk -> sk_shutdown = SHUTDOWN_MASK ;
2969+ WRITE_ONCE ( sk -> sk_shutdown , SHUTDOWN_MASK ) ;
29632970
29642971 if ((1 << sk -> sk_state ) & (TCPF_LISTEN | TCPF_CLOSE )) {
29652972 mptcp_listen_inuse_dec (sk );
@@ -3039,7 +3046,7 @@ static void mptcp_close(struct sock *sk, long timeout)
30393046 sock_put (sk );
30403047}
30413048
3042- void mptcp_copy_inaddrs (struct sock * msk , const struct sock * ssk )
3049+ static void mptcp_copy_inaddrs (struct sock * msk , const struct sock * ssk )
30433050{
30443051#if IS_ENABLED (CONFIG_MPTCP_IPV6 )
30453052 const struct ipv6_pinfo * ssk6 = inet6_sk (ssk );
@@ -3102,7 +3109,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
31023109 mptcp_pm_data_reset (msk );
31033110 mptcp_ca_reset (sk );
31043111
3105- sk -> sk_shutdown = 0 ;
3112+ WRITE_ONCE ( sk -> sk_shutdown , 0 ) ;
31063113 sk_error_report (sk );
31073114 return 0 ;
31083115}
@@ -3116,9 +3123,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
31163123}
31173124#endif
31183125
3119- struct sock * mptcp_sk_clone (const struct sock * sk ,
3120- const struct mptcp_options_received * mp_opt ,
3121- struct request_sock * req )
3126+ struct sock * mptcp_sk_clone_init (const struct sock * sk ,
3127+ const struct mptcp_options_received * mp_opt ,
3128+ struct sock * ssk ,
3129+ struct request_sock * req )
31223130{
31233131 struct mptcp_subflow_request_sock * subflow_req = mptcp_subflow_rsk (req );
31243132 struct sock * nsk = sk_clone_lock (sk , GFP_ATOMIC );
@@ -3137,7 +3145,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
31373145 msk = mptcp_sk (nsk );
31383146 msk -> local_key = subflow_req -> local_key ;
31393147 msk -> token = subflow_req -> token ;
3140- msk -> subflow = NULL ;
3148+ WRITE_ONCE ( msk -> subflow , NULL ) ;
31413149 msk -> in_accept_queue = 1 ;
31423150 WRITE_ONCE (msk -> fully_established , false);
31433151 if (mp_opt -> suboptions & OPTION_MPTCP_CSUMREQD )
@@ -3150,10 +3158,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
31503158 msk -> setsockopt_seq = mptcp_sk (sk )-> setsockopt_seq ;
31513159
31523160 sock_reset_flag (nsk , SOCK_RCU_FREE );
3153- /* will be fully established after successful MPC subflow creation */
3154- inet_sk_state_store (nsk , TCP_SYN_RECV );
3155-
31563161 security_inet_csk_clone (nsk , req );
3162+
3163+ /* this can't race with mptcp_close(), as the msk is
3164+ * not yet exposted to user-space
3165+ */
3166+ inet_sk_state_store (nsk , TCP_ESTABLISHED );
3167+
3168+ /* The msk maintain a ref to each subflow in the connections list */
3169+ WRITE_ONCE (msk -> first , ssk );
3170+ list_add (& mptcp_subflow_ctx (ssk )-> node , & msk -> conn_list );
3171+ sock_hold (ssk );
3172+
3173+ /* new mpc subflow takes ownership of the newly
3174+ * created mptcp socket
3175+ */
3176+ mptcp_token_accept (subflow_req , msk );
3177+
3178+ /* set msk addresses early to ensure mptcp_pm_get_local_id()
3179+ * uses the correct data
3180+ */
3181+ mptcp_copy_inaddrs (nsk , ssk );
3182+ mptcp_propagate_sndbuf (nsk , ssk );
3183+
3184+ mptcp_rcv_space_init (msk , ssk );
31573185 bh_unlock_sock (nsk );
31583186
31593187 /* note: the newly allocated socket refcount is 2 now */
@@ -3185,7 +3213,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
31853213 struct socket * listener ;
31863214 struct sock * newsk ;
31873215
3188- listener = msk -> subflow ;
3216+ listener = READ_ONCE ( msk -> subflow ) ;
31893217 if (WARN_ON_ONCE (!listener )) {
31903218 * err = - EINVAL ;
31913219 return NULL ;
@@ -3465,14 +3493,16 @@ bool mptcp_finish_join(struct sock *ssk)
34653493 return false;
34663494 }
34673495
3468- if (!list_empty (& subflow -> node ))
3469- goto out ;
3496+ /* active subflow, already present inside the conn_list */
3497+ if (!list_empty (& subflow -> node )) {
3498+ mptcp_subflow_joined (msk , ssk );
3499+ return true;
3500+ }
34703501
34713502 if (!mptcp_pm_allow_new_subflow (msk ))
34723503 goto err_prohibited ;
34733504
3474- /* active connections are already on conn_list.
3475- * If we can't acquire msk socket lock here, let the release callback
3505+ /* If we can't acquire msk socket lock here, let the release callback
34763506 * handle it
34773507 */
34783508 mptcp_data_lock (parent );
@@ -3495,11 +3525,6 @@ bool mptcp_finish_join(struct sock *ssk)
34953525 return false;
34963526 }
34973527
3498- subflow -> map_seq = READ_ONCE (msk -> ack_seq );
3499- WRITE_ONCE (msk -> allow_infinite_fallback , false);
3500-
3501- out :
3502- mptcp_event (MPTCP_EVENT_SUB_ESTABLISHED , msk , ssk , GFP_ATOMIC );
35033528 return true;
35043529}
35053530
@@ -3617,9 +3642,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
36173642 * acquired the subflow socket lock, too.
36183643 */
36193644 if (msk -> fastopening )
3620- err = __inet_stream_connect (ssock , uaddr , addr_len , msk -> connect_flags , 1 );
3645+ err = __inet_stream_connect (ssock , uaddr , addr_len , O_NONBLOCK , 1 );
36213646 else
3622- err = inet_stream_connect (ssock , uaddr , addr_len , msk -> connect_flags );
3647+ err = inet_stream_connect (ssock , uaddr , addr_len , O_NONBLOCK );
36233648 inet_sk (sk )-> defer_connect = inet_sk (ssock -> sk )-> defer_connect ;
36243649
36253650 /* on successful connect, the msk state will be moved to established by
@@ -3632,12 +3657,10 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
36323657
36333658 mptcp_copy_inaddrs (sk , ssock -> sk );
36343659
3635- /* unblocking connect, mptcp-level inet_stream_connect will error out
3636- * without changing the socket state, update it here.
3660+ /* silence EINPROGRESS and let the caller inet_stream_connect
3661+ * handle the connection in progress
36373662 */
3638- if (err == - EINPROGRESS )
3639- sk -> sk_socket -> state = ssock -> state ;
3640- return err ;
3663+ return 0 ;
36413664}
36423665
36433666static struct proto mptcp_prot = {
@@ -3696,18 +3719,6 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
36963719 return err ;
36973720}
36983721
3699- static int mptcp_stream_connect (struct socket * sock , struct sockaddr * uaddr ,
3700- int addr_len , int flags )
3701- {
3702- int ret ;
3703-
3704- lock_sock (sock -> sk );
3705- mptcp_sk (sock -> sk )-> connect_flags = flags ;
3706- ret = __inet_stream_connect (sock , uaddr , addr_len , flags , 0 );
3707- release_sock (sock -> sk );
3708- return ret ;
3709- }
3710-
37113722static int mptcp_listen (struct socket * sock , int backlog )
37123723{
37133724 struct mptcp_sock * msk = mptcp_sk (sock -> sk );
@@ -3751,10 +3762,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
37513762
37523763 pr_debug ("msk=%p" , msk );
37533764
3754- /* buggy applications can call accept on socket states other then LISTEN
3765+ /* Buggy applications can call accept on socket states other then LISTEN
37553766 * but no need to allocate the first subflow just to error out.
37563767 */
3757- ssock = msk -> subflow ;
3768+ ssock = READ_ONCE ( msk -> subflow ) ;
37583769 if (!ssock )
37593770 return - EINVAL ;
37603771
@@ -3800,9 +3811,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
38003811{
38013812 struct sock * sk = (struct sock * )msk ;
38023813
3803- if (unlikely (sk -> sk_shutdown & SEND_SHUTDOWN ))
3804- return EPOLLOUT | EPOLLWRNORM ;
3805-
38063814 if (sk_stream_is_writeable (sk ))
38073815 return EPOLLOUT | EPOLLWRNORM ;
38083816
@@ -3820,6 +3828,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
38203828 struct sock * sk = sock -> sk ;
38213829 struct mptcp_sock * msk ;
38223830 __poll_t mask = 0 ;
3831+ u8 shutdown ;
38233832 int state ;
38243833
38253834 msk = mptcp_sk (sk );
@@ -3828,23 +3837,30 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
38283837 state = inet_sk_state_load (sk );
38293838 pr_debug ("msk=%p state=%d flags=%lx" , msk , state , msk -> flags );
38303839 if (state == TCP_LISTEN ) {
3831- if (WARN_ON_ONCE (!msk -> subflow || !msk -> subflow -> sk ))
3840+ struct socket * ssock = READ_ONCE (msk -> subflow );
3841+
3842+ if (WARN_ON_ONCE (!ssock || !ssock -> sk ))
38323843 return 0 ;
38333844
3834- return inet_csk_listen_poll (msk -> subflow -> sk );
3845+ return inet_csk_listen_poll (ssock -> sk );
38353846 }
38363847
3848+ shutdown = READ_ONCE (sk -> sk_shutdown );
3849+ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE )
3850+ mask |= EPOLLHUP ;
3851+ if (shutdown & RCV_SHUTDOWN )
3852+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP ;
3853+
38373854 if (state != TCP_SYN_SENT && state != TCP_SYN_RECV ) {
38383855 mask |= mptcp_check_readable (msk );
3839- mask |= mptcp_check_writeable (msk );
3856+ if (shutdown & SEND_SHUTDOWN )
3857+ mask |= EPOLLOUT | EPOLLWRNORM ;
3858+ else
3859+ mask |= mptcp_check_writeable (msk );
38403860 } else if (state == TCP_SYN_SENT && inet_sk (sk )-> defer_connect ) {
38413861 /* cf tcp_poll() note about TFO */
38423862 mask |= EPOLLOUT | EPOLLWRNORM ;
38433863 }
3844- if (sk -> sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE )
3845- mask |= EPOLLHUP ;
3846- if (sk -> sk_shutdown & RCV_SHUTDOWN )
3847- mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP ;
38483864
38493865 /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
38503866 smp_rmb ();
@@ -3859,7 +3875,7 @@ static const struct proto_ops mptcp_stream_ops = {
38593875 .owner = THIS_MODULE ,
38603876 .release = inet_release ,
38613877 .bind = mptcp_bind ,
3862- .connect = mptcp_stream_connect ,
3878+ .connect = inet_stream_connect ,
38633879 .socketpair = sock_no_socketpair ,
38643880 .accept = mptcp_stream_accept ,
38653881 .getname = inet_getname ,
@@ -3954,7 +3970,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
39543970 .owner = THIS_MODULE ,
39553971 .release = inet6_release ,
39563972 .bind = mptcp_bind ,
3957- .connect = mptcp_stream_connect ,
3973+ .connect = inet_stream_connect ,
39583974 .socketpair = sock_no_socketpair ,
39593975 .accept = mptcp_stream_accept ,
39603976 .getname = inet6_getname ,
0 commit comments