Skip to content

Commit 415e636

Browse files
fmaurer-rhPaolo Abeni
authored andcommitted
hsr: Implement more robust duplicate discard for PRP
The PRP duplicate discard algorithm does not work reliably with certain link faults. Especially with packet loss on one link, the duplicate discard algorithm drops valid packets which leads to packet loss on the PRP interface where the link fault should in theory be perfectly recoverable by PRP. This happens because the algorithm opens the drop window on the lossy link, covering received and lost sequence numbers. If the other, non-lossy link receives the duplicate for a lost frame, it is within the drop window of the lossy link and therefore dropped. Since IEC 62439-3:2012, a node has one sequence number counter for frames it sends, instead of one sequence number counter for each destination. Therefore, a node can not expect to receive contiguous sequence numbers from a sender. A missing sequence number can be totally normal (if the sender intermittently communicates with another node) or mean a frame was lost. The algorithm, as previously implemented in commit 05fd00e ("net: hsr: Fix PRP duplicate detection"), was part of IEC 62439-3:2010 (HSRv0/PRPv0) but was removed with IEC 62439-3:2012 (HSRv1/PRPv1). Since that, no algorithm is specified but up to implementers. It should be "designed such that it never rejects a legitimate frame, while occasional acceptance of a duplicate can be tolerated" (IEC 62439-3:2021). For the duplicate discard algorithm, this means that 1) we need to track the sequence numbers individually to account for non-contiguous sequence numbers, and 2) we should always err on the side of accepting a duplicate than dropping a valid frame. The idea of the new algorithm is to store the seen sequence numbers in a bitmap. To keep the size of the bitmap in control, we store it as a "sparse bitmap" where the bitmap is split into blocks and not all blocks exist at the same time. The sparse bitmap is implemented using an xarray that keeps the references to the individual blocks and a backing ring buffer that stores the actual blocks. New blocks are initialized in the buffer and added to the xarray as needed when new frames arrive. Existing blocks are removed in two conditions: 1. The block found for an arriving sequence number is old and therefore not relevant to the duplicate discard algorithm anymore, i.e., it has been added more than the entry forget time ago. In this case, the block is removed from the xarray and marked as forgotten (by setting its timestamp to 0). 2. Space is needed in the ring buffer for a new block. In this case, the block is removed from the xarray, if it hasn't already been forgotten (by 1.). Afterwards, the new block is initialized in its place. This has the nice property that we can reliably track sequence numbers on low traffic situations (where they expire based on their timestamp) and more quickly forget sequence numbers in high traffic situations before they potentially wrap over and repeat before they are expired. When nodes are merged, the blocks are merged as well. The timestamp of a merged block is set to the minimum of the two timestamps to never keep around a seen sequence number for too long. The bitmaps are or'd to mark all seen sequence numbers as seen. All of this still happens under seq_out_lock, to prevent concurrent access to the blocks. The KUnit test for the algorithm is updated as well. The updates are done in a way to match the original intends pretty closely. Currently, there is much knowledge about the actual algorithm baked into the tests (especially the expectations) which may need some redesign in the future. Reported-by: Steffen Lindner <steffen.lindner@de.abb.com> Signed-off-by: Felix Maurer <fmaurer@redhat.com> Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Tested-by: Steffen Lindner <steffen.lindner@de.abb.com> Link: https://patch.msgid.link/8ce15a996099df2df5b700969a39e7df400e8dbb.1770299429.git.fmaurer@redhat.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
1 parent ca4a09a commit 415e636

3 files changed

Lines changed: 237 additions & 138 deletions

File tree

net/hsr/hsr_framereg.c

Lines changed: 139 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* Same code handles filtering of duplicates for PRP as well.
1212
*/
1313

14+
#include <kunit/visibility.h>
1415
#include <linux/if_ether.h>
1516
#include <linux/etherdevice.h>
1617
#include <linux/slab.h>
@@ -35,7 +36,6 @@ static bool seq_nr_after(u16 a, u16 b)
3536

3637
#define seq_nr_before(a, b) seq_nr_after((b), (a))
3738
#define seq_nr_before_or_eq(a, b) (!seq_nr_after((a), (b)))
38-
#define PRP_DROP_WINDOW_LEN 32768
3939

4040
bool hsr_addr_is_redbox(struct hsr_priv *hsr, unsigned char *addr)
4141
{
@@ -126,13 +126,29 @@ void hsr_del_self_node(struct hsr_priv *hsr)
126126
kfree_rcu(old, rcu_head);
127127
}
128128

129+
static void hsr_free_node(struct hsr_node *node)
130+
{
131+
xa_destroy(&node->seq_blocks);
132+
kfree(node->block_buf);
133+
kfree(node);
134+
}
135+
136+
static void hsr_free_node_rcu(struct rcu_head *rn)
137+
{
138+
struct hsr_node *node = container_of(rn, struct hsr_node, rcu_head);
139+
140+
hsr_free_node(node);
141+
}
142+
129143
void hsr_del_nodes(struct list_head *node_db)
130144
{
131145
struct hsr_node *node;
132146
struct hsr_node *tmp;
133147

134-
list_for_each_entry_safe(node, tmp, node_db, mac_list)
135-
kfree(node);
148+
list_for_each_entry_safe(node, tmp, node_db, mac_list) {
149+
list_del(&node->mac_list);
150+
hsr_free_node(node);
151+
}
136152
}
137153

138154
void prp_handle_san_frame(bool san, enum hsr_port_type port,
@@ -158,7 +174,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
158174
u16 seq_out, bool san,
159175
enum hsr_port_type rx_port)
160176
{
161-
struct hsr_node *new_node, *node;
177+
struct hsr_node *new_node, *node = NULL;
162178
unsigned long now;
163179
int i;
164180

@@ -169,18 +185,22 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
169185
ether_addr_copy(new_node->macaddress_A, addr);
170186
spin_lock_init(&new_node->seq_out_lock);
171187

188+
new_node->block_buf = kcalloc(HSR_MAX_SEQ_BLOCKS,
189+
sizeof(struct hsr_seq_block),
190+
GFP_ATOMIC);
191+
if (!new_node->block_buf)
192+
goto free;
193+
194+
xa_init(&new_node->seq_blocks);
195+
172196
/* We are only interested in time diffs here, so use current jiffies
173197
* as initialization. (0 could trigger an spurious ring error warning).
174198
*/
175199
now = jiffies;
176200
for (i = 0; i < HSR_PT_PORTS; i++) {
177201
new_node->time_in[i] = now;
178202
new_node->time_out[i] = now;
179-
}
180-
for (i = 0; i < HSR_PT_PORTS; i++) {
181203
new_node->seq_out[i] = seq_out;
182-
new_node->seq_expected[i] = seq_out + 1;
183-
new_node->seq_start[i] = seq_out + 1;
184204
}
185205

186206
if (san && hsr->proto_ops->handle_san_frame)
@@ -199,6 +219,8 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
199219
return new_node;
200220
out:
201221
spin_unlock_bh(&hsr->list_lock);
222+
kfree(new_node->block_buf);
223+
free:
202224
kfree(new_node);
203225
return node;
204226
}
@@ -280,6 +302,62 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
280302
san, rx_port);
281303
}
282304

305+
static bool hsr_seq_block_is_old(struct hsr_seq_block *block)
306+
{
307+
unsigned long expiry = msecs_to_jiffies(HSR_ENTRY_FORGET_TIME);
308+
309+
return time_is_before_jiffies(block->time + expiry);
310+
}
311+
312+
static void hsr_forget_seq_block(struct hsr_node *node,
313+
struct hsr_seq_block *block)
314+
{
315+
if (block->time)
316+
xa_erase(&node->seq_blocks, block->block_idx);
317+
block->time = 0;
318+
}
319+
320+
/* Get the currently active sequence number block. If there is no block yet, or
321+
* the existing one is expired, a new block is created. The idea is to maintain
322+
* a "sparse bitmap" where a bitmap for the whole sequence number space is
323+
* split into blocks and not all blocks exist all the time. The blocks can
324+
* expire after time (in low traffic situations) or when they are replaced in
325+
* the backing fixed size buffer (in high traffic situations).
326+
*/
327+
VISIBLE_IF_KUNIT struct hsr_seq_block *hsr_get_seq_block(struct hsr_node *node,
328+
u16 block_idx)
329+
{
330+
struct hsr_seq_block *block, *res;
331+
332+
block = xa_load(&node->seq_blocks, block_idx);
333+
334+
if (block && hsr_seq_block_is_old(block)) {
335+
hsr_forget_seq_block(node, block);
336+
block = NULL;
337+
}
338+
339+
if (!block) {
340+
block = &node->block_buf[node->next_block];
341+
hsr_forget_seq_block(node, block);
342+
343+
memset(block, 0, sizeof(*block));
344+
block->time = jiffies;
345+
block->block_idx = block_idx;
346+
347+
res = xa_store(&node->seq_blocks, block_idx, block, GFP_ATOMIC);
348+
if (xa_is_err(res)) {
349+
block->time = 0;
350+
return NULL;
351+
}
352+
353+
node->next_block =
354+
(node->next_block + 1) & (HSR_MAX_SEQ_BLOCKS - 1);
355+
}
356+
357+
return block;
358+
}
359+
EXPORT_SYMBOL_IF_KUNIT(hsr_get_seq_block);
360+
283361
/* Use the Supervision frame's info about an eventual macaddress_B for merging
284362
* nodes that has previously had their macaddress_B registered as a separate
285363
* node.
@@ -288,16 +366,18 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
288366
{
289367
struct hsr_node *node_curr = frame->node_src;
290368
struct hsr_port *port_rcv = frame->port_rcv;
369+
struct hsr_seq_block *src_blk, *merge_blk;
291370
struct hsr_priv *hsr = port_rcv->hsr;
292-
struct hsr_sup_payload *hsr_sp;
293371
struct hsr_sup_tlv *hsr_sup_tlv;
372+
struct hsr_sup_payload *hsr_sp;
294373
struct hsr_node *node_real;
295374
struct sk_buff *skb = NULL;
296375
struct list_head *node_db;
297376
struct ethhdr *ethhdr;
298-
int i;
299-
unsigned int pull_size = 0;
300377
unsigned int total_pull_size = 0;
378+
unsigned int pull_size = 0;
379+
unsigned long idx;
380+
int i;
301381

302382
/* Here either frame->skb_hsr or frame->skb_prp should be
303383
* valid as supervision frame always will have protocol
@@ -391,14 +471,26 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
391471
if (seq_nr_after(node_curr->seq_out[i], node_real->seq_out[i]))
392472
node_real->seq_out[i] = node_curr->seq_out[i];
393473
}
474+
475+
xa_for_each(&node_curr->seq_blocks, idx, src_blk) {
476+
if (hsr_seq_block_is_old(src_blk))
477+
continue;
478+
479+
merge_blk = hsr_get_seq_block(node_real, src_blk->block_idx);
480+
if (!merge_blk)
481+
continue;
482+
merge_blk->time = min(merge_blk->time, src_blk->time);
483+
bitmap_or(merge_blk->seq_nrs, merge_blk->seq_nrs,
484+
src_blk->seq_nrs, HSR_SEQ_BLOCK_SIZE);
485+
}
394486
spin_unlock_bh(&node_real->seq_out_lock);
395487
node_real->addr_B_port = port_rcv->type;
396488

397489
spin_lock_bh(&hsr->list_lock);
398490
if (!node_curr->removed) {
399491
list_del_rcu(&node_curr->mac_list);
400492
node_curr->removed = true;
401-
kfree_rcu(node_curr, rcu_head);
493+
call_rcu(&node_curr->rcu_head, hsr_free_node_rcu);
402494
}
403495
spin_unlock_bh(&hsr->list_lock);
404496

@@ -505,17 +597,12 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame)
505597
return 0;
506598
}
507599

508-
/* Adaptation of the PRP duplicate discard algorithm described in wireshark
509-
* wiki (https://wiki.wireshark.org/PRP)
510-
*
511-
* A drop window is maintained for both LANs with start sequence set to the
512-
* first sequence accepted on the LAN that has not been seen on the other LAN,
513-
* and expected sequence set to the latest received sequence number plus one.
514-
*
515-
* When a frame is received on either LAN it is compared against the received
516-
* frames on the other LAN. If it is outside the drop window of the other LAN
517-
* the frame is accepted and the drop window is updated.
518-
* The drop window for the other LAN is reset.
600+
/* PRP duplicate discard algorithm: we maintain a bitmap where we set a bit for
601+
* every seen sequence number. The bitmap is split into blocks and the block
602+
* management is detailed in hsr_get_seq_block(). In any case, we err on the
603+
* side of accepting a packet, as the specification requires the algorithm to
604+
* be "designed such that it never rejects a legitimate frame, while occasional
605+
* acceptance of a duplicate can be tolerated." (IEC 62439-3:2021, 4.1.10.3).
519606
*
520607
* 'port' is the outgoing interface
521608
* 'frame' is the frame to be sent
@@ -526,71 +613,50 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame)
526613
*/
527614
int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame)
528615
{
529-
enum hsr_port_type other_port;
530-
enum hsr_port_type rcv_port;
616+
u16 sequence_nr, seq_bit, block_idx;
617+
struct hsr_seq_block *block;
531618
struct hsr_node *node;
532-
u16 sequence_diff;
533-
u16 sequence_exp;
534-
u16 sequence_nr;
535619

536-
/* out-going frames are always in order
537-
* and can be checked the same way as for HSR
538-
*/
539-
if (frame->port_rcv->type == HSR_PT_MASTER)
540-
return hsr_register_frame_out(port, frame);
620+
node = frame->node_src;
621+
sequence_nr = frame->sequence_nr;
622+
623+
/* out-going frames are always in order */
624+
if (frame->port_rcv->type == HSR_PT_MASTER) {
625+
spin_lock_bh(&node->seq_out_lock);
626+
node->time_out[port->type] = jiffies;
627+
node->seq_out[port->type] = sequence_nr;
628+
spin_unlock_bh(&node->seq_out_lock);
629+
return 0;
630+
}
541631

542632
/* for PRP we should only forward frames from the slave ports
543633
* to the master port
544634
*/
545635
if (port->type != HSR_PT_MASTER)
546636
return 1;
547637

548-
node = frame->node_src;
549-
sequence_nr = frame->sequence_nr;
550-
sequence_exp = sequence_nr + 1;
551-
rcv_port = frame->port_rcv->type;
552-
other_port = rcv_port == HSR_PT_SLAVE_A ? HSR_PT_SLAVE_B :
553-
HSR_PT_SLAVE_A;
554-
555638
spin_lock_bh(&node->seq_out_lock);
556-
if (time_is_before_jiffies(node->time_out[port->type] +
557-
msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)) ||
558-
(node->seq_start[rcv_port] == node->seq_expected[rcv_port] &&
559-
node->seq_start[other_port] == node->seq_expected[other_port])) {
560-
/* the node hasn't been sending for a while
561-
* or both drop windows are empty, forward the frame
562-
*/
563-
node->seq_start[rcv_port] = sequence_nr;
564-
} else if (seq_nr_before(sequence_nr, node->seq_expected[other_port]) &&
565-
seq_nr_before_or_eq(node->seq_start[other_port], sequence_nr)) {
566-
/* drop the frame, update the drop window for the other port
567-
* and reset our drop window
568-
*/
569-
node->seq_start[other_port] = sequence_exp;
570-
node->seq_expected[rcv_port] = sequence_exp;
571-
node->seq_start[rcv_port] = node->seq_expected[rcv_port];
572-
spin_unlock_bh(&node->seq_out_lock);
573-
return 1;
574-
}
575639

576-
/* update the drop window for the port where this frame was received
577-
* and clear the drop window for the other port
578-
*/
579-
node->seq_start[other_port] = node->seq_expected[other_port];
580-
node->seq_expected[rcv_port] = sequence_exp;
581-
sequence_diff = sequence_exp - node->seq_start[rcv_port];
582-
if (sequence_diff > PRP_DROP_WINDOW_LEN)
583-
node->seq_start[rcv_port] = sequence_exp - PRP_DROP_WINDOW_LEN;
640+
block_idx = hsr_seq_block_index(sequence_nr);
641+
block = hsr_get_seq_block(node, block_idx);
642+
if (!block)
643+
goto out_new;
644+
645+
seq_bit = hsr_seq_block_bit(sequence_nr);
646+
if (__test_and_set_bit(seq_bit, block->seq_nrs))
647+
goto out_seen;
584648

585649
node->time_out[port->type] = jiffies;
586650
node->seq_out[port->type] = sequence_nr;
651+
out_new:
587652
spin_unlock_bh(&node->seq_out_lock);
588653
return 0;
589-
}
590654

591-
#if IS_MODULE(CONFIG_PRP_DUP_DISCARD_KUNIT_TEST)
592-
EXPORT_SYMBOL(prp_register_frame_out);
593-
#endif
655+
out_seen:
656+
spin_unlock_bh(&node->seq_out_lock);
657+
return 1;
658+
}
659+
EXPORT_SYMBOL_IF_KUNIT(prp_register_frame_out);
594660

595661
static struct hsr_port *get_late_port(struct hsr_priv *hsr,
596662
struct hsr_node *node)
@@ -672,7 +738,7 @@ void hsr_prune_nodes(struct timer_list *t)
672738
list_del_rcu(&node->mac_list);
673739
node->removed = true;
674740
/* Note that we need to free this entry later: */
675-
kfree_rcu(node, rcu_head);
741+
call_rcu(&node->rcu_head, hsr_free_node_rcu);
676742
}
677743
}
678744
}
@@ -706,7 +772,7 @@ void hsr_prune_proxy_nodes(struct timer_list *t)
706772
list_del_rcu(&node->mac_list);
707773
node->removed = true;
708774
/* Note that we need to free this entry later: */
709-
kfree_rcu(node, rcu_head);
775+
call_rcu(&node->rcu_head, hsr_free_node_rcu);
710776
}
711777
}
712778
}

net/hsr/hsr_framereg.h

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,27 @@ bool hsr_is_node_in_db(struct list_head *node_db,
7474

7575
int prp_register_frame_out(struct hsr_port *port, struct hsr_frame_info *frame);
7676

77+
#if IS_ENABLED(CONFIG_KUNIT)
78+
struct hsr_seq_block *hsr_get_seq_block(struct hsr_node *node, u16 block_idx);
79+
#endif
80+
81+
#define HSR_SEQ_BLOCK_SHIFT 7 /* 128 bits */
82+
#define HSR_SEQ_BLOCK_SIZE (1 << HSR_SEQ_BLOCK_SHIFT)
83+
#define HSR_SEQ_BLOCK_MASK (HSR_SEQ_BLOCK_SIZE - 1)
84+
#define HSR_MAX_SEQ_BLOCKS 64
85+
86+
#define hsr_seq_block_index(sequence_nr) ((sequence_nr) >> HSR_SEQ_BLOCK_SHIFT)
87+
#define hsr_seq_block_bit(sequence_nr) ((sequence_nr) & HSR_SEQ_BLOCK_MASK)
88+
89+
struct hsr_seq_block {
90+
unsigned long time;
91+
u16 block_idx;
92+
DECLARE_BITMAP(seq_nrs, HSR_SEQ_BLOCK_SIZE);
93+
};
94+
7795
struct hsr_node {
7896
struct list_head mac_list;
79-
/* Protect R/W access to seq_out */
97+
/* Protect R/W access to seq_out and seq_blocks */
8098
spinlock_t seq_out_lock;
8199
unsigned char macaddress_A[ETH_ALEN];
82100
unsigned char macaddress_B[ETH_ALEN];
@@ -91,8 +109,9 @@ struct hsr_node {
91109
u16 seq_out[HSR_PT_PORTS];
92110
bool removed;
93111
/* PRP specific duplicate handling */
94-
u16 seq_expected[HSR_PT_PORTS];
95-
u16 seq_start[HSR_PT_PORTS];
112+
struct xarray seq_blocks;
113+
struct hsr_seq_block *block_buf;
114+
unsigned int next_block;
96115
struct rcu_head rcu_head;
97116
};
98117

0 commit comments

Comments
 (0)