4444static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp ;
4545
4646static void __mptcp_destroy_sock (struct sock * sk );
47- static void __mptcp_check_send_data_fin (struct sock * sk );
47+ static void mptcp_check_send_data_fin (struct sock * sk );
4848
4949DEFINE_PER_CPU (struct mptcp_delegated_action , mptcp_delegated_actions );
5050static struct net_device mptcp_napi_dev ;
@@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
424424{
425425 struct mptcp_sock * msk = mptcp_sk (sk );
426426
427- return !__mptcp_check_fallback (msk ) &&
428- ((1 << sk -> sk_state ) &
427+ return ((1 << sk -> sk_state ) &
429428 (TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK )) &&
430429 msk -> write_seq == READ_ONCE (msk -> snd_una );
431430}
@@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
583582 u64 rcv_data_fin_seq ;
584583 bool ret = false;
585584
586- if (__mptcp_check_fallback (msk ))
587- return ret ;
588-
589585 /* Need to ack a DATA_FIN received from a peer while this side
590586 * of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
591587 * msk->rcv_data_fin was set when parsing the incoming options
@@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
623619 }
624620
625621 ret = true;
626- mptcp_send_ack (msk );
622+ if (!__mptcp_check_fallback (msk ))
623+ mptcp_send_ack (msk );
627624 mptcp_close_wake_up (sk );
628625 }
629626 return ret ;
@@ -850,12 +847,12 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
850847 return true;
851848}
852849
853- static void __mptcp_flush_join_list (struct sock * sk )
850+ static void __mptcp_flush_join_list (struct sock * sk , struct list_head * join_list )
854851{
855852 struct mptcp_subflow_context * tmp , * subflow ;
856853 struct mptcp_sock * msk = mptcp_sk (sk );
857854
858- list_for_each_entry_safe (subflow , tmp , & msk -> join_list , node ) {
855+ list_for_each_entry_safe (subflow , tmp , join_list , node ) {
859856 struct sock * ssk = mptcp_subflow_tcp_sock (subflow );
860857 bool slow = lock_sock_fast (ssk );
861858
@@ -897,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk)
897894 return false;
898895}
899896
900- void mptcp_subflow_eof (struct sock * sk )
901- {
902- if (!test_and_set_bit (MPTCP_WORK_EOF , & mptcp_sk (sk )-> flags ))
903- mptcp_schedule_work (sk );
904- }
905-
906- static void mptcp_check_for_eof (struct mptcp_sock * msk )
907- {
908- struct mptcp_subflow_context * subflow ;
909- struct sock * sk = (struct sock * )msk ;
910- int receivers = 0 ;
911-
912- mptcp_for_each_subflow (msk , subflow )
913- receivers += !subflow -> rx_eof ;
914- if (receivers )
915- return ;
916-
917- if (!(sk -> sk_shutdown & RCV_SHUTDOWN )) {
918- /* hopefully temporary hack: propagate shutdown status
919- * to msk, when all subflows agree on it
920- */
921- WRITE_ONCE (sk -> sk_shutdown , sk -> sk_shutdown | RCV_SHUTDOWN );
922-
923- smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
924- sk -> sk_data_ready (sk );
925- }
926-
927- switch (sk -> sk_state ) {
928- case TCP_ESTABLISHED :
929- inet_sk_state_store (sk , TCP_CLOSE_WAIT );
930- break ;
931- case TCP_FIN_WAIT1 :
932- inet_sk_state_store (sk , TCP_CLOSING );
933- break ;
934- case TCP_FIN_WAIT2 :
935- inet_sk_state_store (sk , TCP_CLOSE );
936- break ;
937- default :
938- return ;
939- }
940- mptcp_close_wake_up (sk );
941- }
942-
943897static struct sock * mptcp_subflow_recv_lookup (const struct mptcp_sock * msk )
944898{
945899 struct mptcp_subflow_context * subflow ;
@@ -1609,7 +1563,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
16091563 if (!mptcp_timer_pending (sk ))
16101564 mptcp_reset_timer (sk );
16111565 if (do_check_data_fin )
1612- __mptcp_check_send_data_fin (sk );
1566+ mptcp_check_send_data_fin (sk );
16131567}
16141568
16151569static void __mptcp_subflow_push_pending (struct sock * sk , struct sock * ssk , bool first )
@@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
17271681 if (ret && ret != - EINPROGRESS && ret != - ERESTARTSYS && ret != - EINTR )
17281682 * copied_syn = 0 ;
17291683 } else if (ret && ret != - EINPROGRESS ) {
1730- mptcp_disconnect (sk , 0 );
1684+ /* The disconnect() op called by tcp_sendmsg_fastopen()/
1685+ * __inet_stream_connect() can fail, due to looking check,
1686+ * see mptcp_disconnect().
1687+ * Attempt it again outside the problematic scope.
1688+ */
1689+ if (!mptcp_disconnect (sk , 0 ))
1690+ sk -> sk_socket -> state = SS_UNCONNECTED ;
17311691 }
17321692 inet_sk (sk )-> defer_connect = 0 ;
17331693
@@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
21582118 break ;
21592119 }
21602120
2161- if (test_and_clear_bit (MPTCP_WORK_EOF , & msk -> flags ))
2162- mptcp_check_for_eof (msk );
2163-
21642121 if (sk -> sk_shutdown & RCV_SHUTDOWN ) {
21652122 /* race breaker: the shutdown could be after the
21662123 * previous receive queue check
@@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23892346
23902347 need_push = (flags & MPTCP_CF_PUSH ) && __mptcp_retransmit_pending_data (sk );
23912348 if (!dispose_it ) {
2392- tcp_disconnect (ssk , 0 );
2349+ /* The MPTCP code never wait on the subflow sockets, TCP-level
2350+ * disconnect should never fail
2351+ */
2352+ WARN_ON_ONCE (tcp_disconnect (ssk , 0 ));
23932353 msk -> subflow -> state = SS_UNCONNECTED ;
23942354 mptcp_subflow_ctx_reset (subflow );
23952355 release_sock (ssk );
@@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
24082368 kfree_rcu (subflow , rcu );
24092369 } else {
24102370 /* otherwise tcp will dispose of the ssk and subflow ctx */
2411- if (ssk -> sk_state == TCP_LISTEN ) {
2412- tcp_set_state (ssk , TCP_CLOSE );
2413- mptcp_subflow_queue_clean (sk , ssk );
2414- inet_csk_listen_stop (ssk );
2415- mptcp_event_pm_listener (ssk , MPTCP_EVENT_LISTENER_CLOSED );
2416- }
2417-
24182371 __tcp_close (ssk , 0 );
24192372
24202373 /* close acquired an extra ref */
@@ -2671,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work)
26712624 if (unlikely ((1 << state ) & (TCPF_CLOSE | TCPF_LISTEN )))
26722625 goto unlock ;
26732626
2674- mptcp_check_data_fin_ack (sk );
2675-
26762627 mptcp_check_fastclose (msk );
26772628
26782629 mptcp_pm_nl_work (msk );
26792630
2680- if (test_and_clear_bit (MPTCP_WORK_EOF , & msk -> flags ))
2681- mptcp_check_for_eof (msk );
2682-
2683- __mptcp_check_send_data_fin (sk );
2631+ mptcp_check_send_data_fin (sk );
2632+ mptcp_check_data_fin_ack (sk );
26842633 mptcp_check_data_fin (sk );
26852634
26862635 if (test_and_clear_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags ))
@@ -2812,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
28122761 break ;
28132762 fallthrough ;
28142763 case TCP_SYN_SENT :
2815- tcp_disconnect (ssk , O_NONBLOCK );
2764+ WARN_ON_ONCE ( tcp_disconnect (ssk , O_NONBLOCK ) );
28162765 break ;
28172766 default :
28182767 if (__mptcp_check_fallback (mptcp_sk (sk ))) {
28192768 pr_debug ("Fallback" );
28202769 ssk -> sk_shutdown |= how ;
28212770 tcp_shutdown (ssk , how );
2771+
2772+ /* simulate the data_fin ack reception to let the state
2773+ * machine move forward
2774+ */
2775+ WRITE_ONCE (mptcp_sk (sk )-> snd_una , mptcp_sk (sk )-> snd_nxt );
2776+ mptcp_schedule_work (sk );
28222777 } else {
28232778 pr_debug ("Sending DATA_FIN on subflow %p" , ssk );
28242779 tcp_send_ack (ssk );
@@ -2858,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk)
28582813 return next & TCP_ACTION_FIN ;
28592814}
28602815
2861- static void __mptcp_check_send_data_fin (struct sock * sk )
2816+ static void mptcp_check_send_data_fin (struct sock * sk )
28622817{
28632818 struct mptcp_subflow_context * subflow ;
28642819 struct mptcp_sock * msk = mptcp_sk (sk );
@@ -2876,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
28762831
28772832 WRITE_ONCE (msk -> snd_nxt , msk -> write_seq );
28782833
2879- /* fallback socket will not get data_fin/ack, can move to the next
2880- * state now
2881- */
2882- if (__mptcp_check_fallback (msk )) {
2883- WRITE_ONCE (msk -> snd_una , msk -> write_seq );
2884- if ((1 << sk -> sk_state ) & (TCPF_CLOSING | TCPF_LAST_ACK )) {
2885- inet_sk_state_store (sk , TCP_CLOSE );
2886- mptcp_close_wake_up (sk );
2887- } else if (sk -> sk_state == TCP_FIN_WAIT1 ) {
2888- inet_sk_state_store (sk , TCP_FIN_WAIT2 );
2889- }
2890- }
2891-
28922834 mptcp_for_each_subflow (msk , subflow ) {
28932835 struct sock * tcp_sk = mptcp_subflow_tcp_sock (subflow );
28942836
@@ -2908,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
29082850 WRITE_ONCE (msk -> write_seq , msk -> write_seq + 1 );
29092851 WRITE_ONCE (msk -> snd_data_fin_enable , 1 );
29102852
2911- __mptcp_check_send_data_fin (sk );
2853+ mptcp_check_send_data_fin (sk );
29122854}
29132855
29142856static void __mptcp_destroy_sock (struct sock * sk )
@@ -2953,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
29532895 return EPOLLIN | EPOLLRDNORM ;
29542896}
29552897
2956- static void mptcp_listen_inuse_dec (struct sock * sk )
2898+ static void mptcp_check_listen_stop (struct sock * sk )
29572899{
2958- if (inet_sk_state_load (sk ) == TCP_LISTEN )
2959- sock_prot_inuse_add (sock_net (sk ), sk -> sk_prot , -1 );
2900+ struct sock * ssk ;
2901+
2902+ if (inet_sk_state_load (sk ) != TCP_LISTEN )
2903+ return ;
2904+
2905+ sock_prot_inuse_add (sock_net (sk ), sk -> sk_prot , -1 );
2906+ ssk = mptcp_sk (sk )-> first ;
2907+ if (WARN_ON_ONCE (!ssk || inet_sk_state_load (ssk ) != TCP_LISTEN ))
2908+ return ;
2909+
2910+ lock_sock_nested (ssk , SINGLE_DEPTH_NESTING );
2911+ mptcp_subflow_queue_clean (sk , ssk );
2912+ inet_csk_listen_stop (ssk );
2913+ mptcp_event_pm_listener (ssk , MPTCP_EVENT_LISTENER_CLOSED );
2914+ tcp_set_state (ssk , TCP_CLOSE );
2915+ release_sock (ssk );
29602916}
29612917
29622918bool __mptcp_close (struct sock * sk , long timeout )
@@ -2969,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
29692925 WRITE_ONCE (sk -> sk_shutdown , SHUTDOWN_MASK );
29702926
29712927 if ((1 << sk -> sk_state ) & (TCPF_LISTEN | TCPF_CLOSE )) {
2972- mptcp_listen_inuse_dec (sk );
2928+ mptcp_check_listen_stop (sk );
29732929 inet_sk_state_store (sk , TCP_CLOSE );
29742930 goto cleanup ;
29752931 }
@@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags)
30733029{
30743030 struct mptcp_sock * msk = mptcp_sk (sk );
30753031
3032+ /* Deny disconnect if other threads are blocked in sk_wait_event()
3033+ * or inet_wait_for_connect().
3034+ */
3035+ if (sk -> sk_wait_pending )
3036+ return - EBUSY ;
3037+
30763038 /* We are on the fastopen error path. We can't call straight into the
30773039 * subflows cleanup code due to lock nesting (we are already under
3078- * msk->firstsocket lock). Do nothing and leave the cleanup to the
3079- * caller.
3040+ * msk->firstsocket lock).
30803041 */
30813042 if (msk -> fastopening )
3082- return 0 ;
3043+ return - EBUSY ;
30833044
3084- mptcp_listen_inuse_dec (sk );
3045+ mptcp_check_listen_stop (sk );
30853046 inet_sk_state_store (sk , TCP_CLOSE );
30863047
30873048 mptcp_stop_timer (sk );
@@ -3140,6 +3101,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
31403101 inet_sk (nsk )-> pinet6 = mptcp_inet6_sk (nsk );
31413102#endif
31423103
3104+ nsk -> sk_wait_pending = 0 ;
31433105 __mptcp_init_sock (nsk );
31443106
31453107 msk = mptcp_sk (nsk );
@@ -3327,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk)
33273289 for (;;) {
33283290 unsigned long flags = (msk -> cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED ) |
33293291 msk -> push_pending ;
3292+ struct list_head join_list ;
3293+
33303294 if (!flags )
33313295 break ;
33323296
3297+ INIT_LIST_HEAD (& join_list );
3298+ list_splice_init (& msk -> join_list , & join_list );
3299+
33333300 /* the following actions acquire the subflow socket lock
33343301 *
33353302 * 1) can't be invoked in atomic scope
@@ -3340,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk)
33403307 msk -> push_pending = 0 ;
33413308 msk -> cb_flags &= ~flags ;
33423309 spin_unlock_bh (& sk -> sk_lock .slock );
3310+
33433311 if (flags & BIT (MPTCP_FLUSH_JOIN_LIST ))
3344- __mptcp_flush_join_list (sk );
3312+ __mptcp_flush_join_list (sk , & join_list );
33453313 if (flags & BIT (MPTCP_PUSH_PENDING ))
33463314 __mptcp_push_pending (sk , 0 );
33473315 if (flags & BIT (MPTCP_RETRANSMIT ))
0 commit comments