@@ -405,7 +405,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
405405 return false;
406406}
407407
408- static void mptcp_stop_timer (struct sock * sk )
408+ static void mptcp_stop_rtx_timer (struct sock * sk )
409409{
410410 struct inet_connection_sock * icsk = inet_csk (sk );
411411
@@ -770,6 +770,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
770770 return moved ;
771771}
772772
773+ static bool __mptcp_subflow_error_report (struct sock * sk , struct sock * ssk )
774+ {
775+ int err = sock_error (ssk );
776+ int ssk_state ;
777+
778+ if (!err )
779+ return false;
780+
781+ /* only propagate errors on fallen-back sockets or
782+ * on MPC connect
783+ */
784+ if (sk -> sk_state != TCP_SYN_SENT && !__mptcp_check_fallback (mptcp_sk (sk )))
785+ return false;
786+
787+ /* We need to propagate only transition to CLOSE state.
788+ * Orphaned socket will see such state change via
789+ * subflow_sched_work_if_closed() and that path will properly
790+ * destroy the msk as needed.
791+ */
792+ ssk_state = inet_sk_state_load (ssk );
793+ if (ssk_state == TCP_CLOSE && !sock_flag (sk , SOCK_DEAD ))
794+ inet_sk_state_store (sk , ssk_state );
795+ WRITE_ONCE (sk -> sk_err , - err );
796+
797+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
798+ smp_wmb ();
799+ sk_error_report (sk );
800+ return true;
801+ }
802+
803+ void __mptcp_error_report (struct sock * sk )
804+ {
805+ struct mptcp_subflow_context * subflow ;
806+ struct mptcp_sock * msk = mptcp_sk (sk );
807+
808+ mptcp_for_each_subflow (msk , subflow )
809+ if (__mptcp_subflow_error_report (sk , mptcp_subflow_tcp_sock (subflow )))
810+ break ;
811+ }
812+
773813/* In most cases we will be able to lock the mptcp socket. If its already
774814 * owned, we need to defer to the work queue to avoid ABBA deadlock.
775815 */
@@ -852,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
852892 mptcp_subflow_ctx (ssk )-> subflow_id = msk -> subflow_id ++ ;
853893 mptcp_sockopt_sync_locked (msk , ssk );
854894 mptcp_subflow_joined (msk , ssk );
895+ mptcp_stop_tout_timer (sk );
855896 return true;
856897}
857898
@@ -871,12 +912,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
871912 }
872913}
873914
874- static bool mptcp_timer_pending (struct sock * sk )
915+ static bool mptcp_rtx_timer_pending (struct sock * sk )
875916{
876917 return timer_pending (& inet_csk (sk )-> icsk_retransmit_timer );
877918}
878919
879- static void mptcp_reset_timer (struct sock * sk )
920+ static void mptcp_reset_rtx_timer (struct sock * sk )
880921{
881922 struct inet_connection_sock * icsk = inet_csk (sk );
882923 unsigned long tout ;
@@ -1010,10 +1051,10 @@ static void __mptcp_clean_una(struct sock *sk)
10101051out :
10111052 if (snd_una == READ_ONCE (msk -> snd_nxt ) &&
10121053 snd_una == READ_ONCE (msk -> write_seq )) {
1013- if (mptcp_timer_pending (sk ) && !mptcp_data_fin_enabled (msk ))
1014- mptcp_stop_timer (sk );
1054+ if (mptcp_rtx_timer_pending (sk ) && !mptcp_data_fin_enabled (msk ))
1055+ mptcp_stop_rtx_timer (sk );
10151056 } else {
1016- mptcp_reset_timer (sk );
1057+ mptcp_reset_rtx_timer (sk );
10171058 }
10181059}
10191060
@@ -1586,8 +1627,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
15861627 mptcp_push_release (ssk , & info );
15871628
15881629 /* ensure the rtx timer is running */
1589- if (!mptcp_timer_pending (sk ))
1590- mptcp_reset_timer (sk );
1630+ if (!mptcp_rtx_timer_pending (sk ))
1631+ mptcp_reset_rtx_timer (sk );
15911632 if (do_check_data_fin )
15921633 mptcp_check_send_data_fin (sk );
15931634}
@@ -1650,8 +1691,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
16501691 if (copied ) {
16511692 tcp_push (ssk , 0 , info .mss_now , tcp_sk (ssk )-> nonagle ,
16521693 info .size_goal );
1653- if (!mptcp_timer_pending (sk ))
1654- mptcp_reset_timer (sk );
1694+ if (!mptcp_rtx_timer_pending (sk ))
1695+ mptcp_reset_rtx_timer (sk );
16551696
16561697 if (msk -> snd_data_fin_enable &&
16571698 msk -> snd_nxt + 1 == msk -> write_seq )
@@ -2220,7 +2261,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
22202261 sock_put (sk );
22212262}
22222263
2223- static void mptcp_timeout_timer (struct timer_list * t )
2264+ static void mptcp_tout_timer (struct timer_list * t )
22242265{
22252266 struct sock * sk = from_timer (sk , t , sk_timer );
22262267
@@ -2329,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23292370 bool dispose_it , need_push = false;
23302371
23312372 /* If the first subflow moved to a close state before accept, e.g. due
2332- * to an incoming reset, mptcp either:
2333- * - if either the subflow or the msk are dead, destroy the context
2334- * (the subflow socket is deleted by inet_child_forget) and the msk
2335- * - otherwise do nothing at the moment and take action at accept and/or
2336- * listener shutdown - user-space must be able to accept() the closed
2337- * socket.
2373+ * to an incoming reset or listener shutdown, the subflow socket is
2374+ * already deleted by inet_child_forget() and the mptcp socket can't
2375+ * survive too.
23382376 */
2339- if (msk -> in_accept_queue && msk -> first == ssk ) {
2340- if (!sock_flag (sk , SOCK_DEAD ) && !sock_flag (ssk , SOCK_DEAD ))
2341- return ;
2342-
2377+ if (msk -> in_accept_queue && msk -> first == ssk &&
2378+ (sock_flag (sk , SOCK_DEAD ) || sock_flag (ssk , SOCK_DEAD ))) {
23432379 /* ensure later check in mptcp_worker() will dispose the msk */
2380+ mptcp_set_close_tout (sk , tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1 ));
23442381 sock_set_flag (sk , SOCK_DEAD );
23452382 lock_sock_nested (ssk , SINGLE_DEPTH_NESTING );
23462383 mptcp_subflow_drop_ctx (ssk );
@@ -2392,6 +2429,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23922429 }
23932430
23942431out_release :
2432+ __mptcp_subflow_error_report (sk , ssk );
23952433 release_sock (ssk );
23962434
23972435 sock_put (ssk );
@@ -2402,6 +2440,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
24022440out :
24032441 if (need_push )
24042442 __mptcp_push_pending (sk , 0 );
2443+
2444+ /* Catch every 'all subflows closed' scenario, including peers silently
2445+ * closing them, e.g. due to timeout.
2446+ * For established sockets, allow an additional timeout before closing,
2447+ * as the protocol can still create more subflows.
2448+ */
2449+ if (list_is_singular (& msk -> conn_list ) && msk -> first &&
2450+ inet_sk_state_load (msk -> first ) == TCP_CLOSE ) {
2451+ if (sk -> sk_state != TCP_ESTABLISHED ||
2452+ msk -> in_accept_queue || sock_flag (sk , SOCK_DEAD )) {
2453+ inet_sk_state_store (sk , TCP_CLOSE );
2454+ mptcp_close_wake_up (sk );
2455+ } else {
2456+ mptcp_start_tout_timer (sk );
2457+ }
2458+ }
24052459}
24062460
24072461void mptcp_close_ssk (struct sock * sk , struct sock * ssk ,
@@ -2445,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk)
24452499
24462500}
24472501
2448- static bool mptcp_should_close (const struct sock * sk )
2502+ static bool mptcp_close_tout_expired (const struct sock * sk )
24492503{
2450- s32 delta = tcp_jiffies32 - inet_csk (sk )-> icsk_mtup .probe_timestamp ;
2451- struct mptcp_subflow_context * subflow ;
2452-
2453- if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk (sk )-> in_accept_queue )
2454- return true;
2504+ if (!inet_csk (sk )-> icsk_mtup .probe_timestamp ||
2505+ sk -> sk_state == TCP_CLOSE )
2506+ return false;
24552507
2456- /* if all subflows are in closed status don't bother with additional
2457- * timeout
2458- */
2459- mptcp_for_each_subflow (mptcp_sk (sk ), subflow ) {
2460- if (inet_sk_state_load (mptcp_subflow_tcp_sock (subflow )) !=
2461- TCP_CLOSE )
2462- return false;
2463- }
2464- return true;
2508+ return time_after32 (tcp_jiffies32 ,
2509+ inet_csk (sk )-> icsk_mtup .probe_timestamp + TCP_TIMEWAIT_LEN );
24652510}
24662511
24672512static void mptcp_check_fastclose (struct mptcp_sock * msk )
@@ -2588,27 +2633,28 @@ static void __mptcp_retrans(struct sock *sk)
25882633reset_timer :
25892634 mptcp_check_and_set_pending (sk );
25902635
2591- if (!mptcp_timer_pending (sk ))
2592- mptcp_reset_timer (sk );
2636+ if (!mptcp_rtx_timer_pending (sk ))
2637+ mptcp_reset_rtx_timer (sk );
25932638}
25942639
25952640/* schedule the timeout timer for the relevant event: either close timeout
25962641 * or mp_fail timeout. The close timeout takes precedence on the mp_fail one
25972642 */
2598- void mptcp_reset_timeout (struct mptcp_sock * msk , unsigned long fail_tout )
2643+ void mptcp_reset_tout_timer (struct mptcp_sock * msk , unsigned long fail_tout )
25992644{
26002645 struct sock * sk = (struct sock * )msk ;
26012646 unsigned long timeout , close_timeout ;
26022647
2603- if (!fail_tout && !sock_flag (sk , SOCK_DEAD ) )
2648+ if (!fail_tout && !inet_csk (sk ) -> icsk_mtup . probe_timestamp )
26042649 return ;
26052650
2606- close_timeout = inet_csk (sk )-> icsk_mtup .probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN ;
2651+ close_timeout = inet_csk (sk )-> icsk_mtup .probe_timestamp - tcp_jiffies32 + jiffies +
2652+ TCP_TIMEWAIT_LEN ;
26072653
26082654 /* the close timeout takes precedence on the fail one, and here at least one of
26092655 * them is active
26102656 */
2611- timeout = sock_flag (sk , SOCK_DEAD ) ? close_timeout : fail_tout ;
2657+ timeout = inet_csk (sk ) -> icsk_mtup . probe_timestamp ? close_timeout : fail_tout ;
26122658
26132659 sk_reset_timer (sk , & sk -> sk_timer , timeout );
26142660}
@@ -2627,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
26272673 mptcp_subflow_reset (ssk );
26282674 WRITE_ONCE (mptcp_subflow_ctx (ssk )-> fail_tout , 0 );
26292675 unlock_sock_fast (ssk , slow );
2630-
2631- mptcp_reset_timeout (msk , 0 );
26322676}
26332677
26342678static void mptcp_do_fastclose (struct sock * sk )
@@ -2665,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work)
26652709 if (test_and_clear_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags ))
26662710 __mptcp_close_subflow (sk );
26672711
2668- /* There is no point in keeping around an orphaned sk timedout or
2669- * closed, but we need the msk around to reply to incoming DATA_FIN,
2670- * even if it is orphaned and in FIN_WAIT2 state
2671- */
2672- if (sock_flag (sk , SOCK_DEAD )) {
2673- if (mptcp_should_close (sk ))
2674- mptcp_do_fastclose (sk );
2712+ if (mptcp_close_tout_expired (sk )) {
2713+ mptcp_do_fastclose (sk );
2714+ mptcp_close_wake_up (sk );
2715+ }
26752716
2676- if (sk -> sk_state == TCP_CLOSE ) {
2677- __mptcp_destroy_sock (sk );
2678- goto unlock ;
2679- }
2717+ if (sock_flag (sk , SOCK_DEAD ) && sk -> sk_state == TCP_CLOSE ) {
2718+ __mptcp_destroy_sock (sk );
2719+ goto unlock ;
26802720 }
26812721
26822722 if (test_and_clear_bit (MPTCP_WORK_RTX , & msk -> flags ))
@@ -2717,7 +2757,7 @@ static void __mptcp_init_sock(struct sock *sk)
27172757
27182758 /* re-use the csk retrans timer for MPTCP-level retrans */
27192759 timer_setup (& msk -> sk .icsk_retransmit_timer , mptcp_retransmit_timer , 0 );
2720- timer_setup (& sk -> sk_timer , mptcp_timeout_timer , 0 );
2760+ timer_setup (& sk -> sk_timer , mptcp_tout_timer , 0 );
27212761}
27222762
27232763static void mptcp_ca_reset (struct sock * sk )
@@ -2808,8 +2848,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
28082848 } else {
28092849 pr_debug ("Sending DATA_FIN on subflow %p" , ssk );
28102850 tcp_send_ack (ssk );
2811- if (!mptcp_timer_pending (sk ))
2812- mptcp_reset_timer (sk );
2851+ if (!mptcp_rtx_timer_pending (sk ))
2852+ mptcp_reset_rtx_timer (sk );
28132853 }
28142854 break ;
28152855 }
@@ -2892,7 +2932,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
28922932
28932933 might_sleep ();
28942934
2895- mptcp_stop_timer (sk );
2935+ mptcp_stop_rtx_timer (sk );
28962936 sk_stop_timer (sk , & sk -> sk_timer );
28972937 msk -> pm .status = 0 ;
28982938 mptcp_release_sched (msk );
@@ -2975,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
29753015
29763016cleanup :
29773017 /* orphan all the subflows */
2978- inet_csk (sk )-> icsk_mtup .probe_timestamp = tcp_jiffies32 ;
29793018 mptcp_for_each_subflow (msk , subflow ) {
29803019 struct sock * ssk = mptcp_subflow_tcp_sock (subflow );
29813020 bool slow = lock_sock_fast_nested (ssk );
@@ -3012,7 +3051,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
30123051 __mptcp_destroy_sock (sk );
30133052 do_cancel_work = true;
30143053 } else {
3015- mptcp_reset_timeout ( msk , 0 );
3054+ mptcp_start_tout_timer ( sk );
30163055 }
30173056
30183057 return do_cancel_work ;
@@ -3075,8 +3114,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
30753114 mptcp_check_listen_stop (sk );
30763115 inet_sk_state_store (sk , TCP_CLOSE );
30773116
3078- mptcp_stop_timer (sk );
3079- sk_stop_timer (sk , & sk -> sk_timer );
3117+ mptcp_stop_rtx_timer (sk );
3118+ mptcp_stop_tout_timer (sk );
30803119
30813120 if (msk -> token )
30823121 mptcp_event (MPTCP_EVENT_CLOSED , msk , NULL , GFP_KERNEL );
0 commit comments