Skip to content

Commit 00cbce5

Browse files
Patrick Kelseyrleon
authored andcommitted
IB/hfi1: Fix bugs with non-PAGE_SIZE-end multi-iovec user SDMA requests
hfi1 user SDMA request processing has two bugs that can cause data corruption for user SDMA requests that have multiple payload iovecs where an iovec other than the tail iovec does not run up to the page boundary for the buffer pointed to by that iovec.a Here are the specific bugs: 1. user_sdma_txadd() does not use struct user_sdma_iovec->iov.iov_len. Rather, user_sdma_txadd() will add up to PAGE_SIZE bytes from iovec to the packet, even if some of those bytes are past iovec->iov.iov_len and are thus not intended to be in the packet. 2. user_sdma_txadd() and user_sdma_send_pkts() fail to advance to the next iovec in user_sdma_request->iovs when the current iovec is not PAGE_SIZE and does not contain enough data to complete the packet. The transmitted packet will contain the wrong data from the iovec pages. This has not been an issue with SDMA packets from hfi1 Verbs or PSM2 because they only produce iovecs that end short of PAGE_SIZE as the tail iovec of an SDMA request. Fixing these bugs exposes other bugs with the SDMA pin cache (struct mmu_rb_handler) that get in way of supporting user SDMA requests with multiple payload iovecs whose buffers do not end at PAGE_SIZE. So this commit fixes those issues as well. Here are the mmu_rb_handler bugs that non-PAGE_SIZE-end multi-iovec payload user SDMA requests can hit: 1. Overlapping memory ranges in mmu_rb_handler will result in duplicate pinnings. 2. When extending an existing mmu_rb_handler entry (struct mmu_rb_node), the mmu_rb code (1) removes the existing entry under a lock, (2) releases that lock, pins the new pages, (3) then reacquires the lock to insert the extended mmu_rb_node. If someone else comes in and inserts an overlapping entry between (2) and (3), insert in (3) will fail. The failure path code in this case unpins _all_ pages in either the original mmu_rb_node or the new mmu_rb_node that was inserted between (2) and (3). 3. In hfi1_mmu_rb_remove_unless_exact(), mmu_rb_node->refcount is incremented outside of mmu_rb_handler->lock. As a result, mmu_rb_node could be evicted by another thread that gets mmu_rb_handler->lock and checks mmu_rb_node->refcount before mmu_rb_node->refcount is incremented. 4. Related to #2 above, SDMA request submission failure path does not check mmu_rb_node->refcount before freeing mmu_rb_node object. If there are other SDMA requests in progress whose iovecs have pointers to the now-freed mmu_rb_node(s), those pointers to the now-freed mmu_rb nodes will be dereferenced when those SDMA requests complete. Fixes: 7be8567 ("IB/hfi1: Don't remove RB entry when not needed.") Fixes: 7724105 ("IB/hfi1: add driver files") Signed-off-by: Brendan Cunningham <bcunningham@cornelisnetworks.com> Signed-off-by: Patrick Kelsey <pat.kelsey@cornelisnetworks.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com> Link: https://lore.kernel.org/r/168088636445.3027109.10054635277810177889.stgit@252.162.96.66.static.eigbox.net Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent 9fe8fec commit 00cbce5

11 files changed

Lines changed: 423 additions & 304 deletions

File tree

drivers/infiniband/hw/hfi1/ipoib_tx.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
215215
const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
216216

217217
ret = sdma_txadd_page(dd,
218+
NULL,
218219
txreq,
219220
skb_frag_page(frag),
220221
frag->bv_offset,

drivers/infiniband/hw/hfi1/mmu_rb.c

Lines changed: 14 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
126126
spin_lock_irqsave(&handler->lock, flags);
127127
node = __mmu_rb_search(handler, mnode->addr, mnode->len);
128128
if (node) {
129-
ret = -EINVAL;
129+
ret = -EEXIST;
130130
goto unlock;
131131
}
132132
__mmu_int_rb_insert(mnode, &handler->root);
@@ -143,6 +143,19 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
143143
return ret;
144144
}
145145

146+
/* Caller must hold handler lock */
147+
struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
148+
unsigned long addr, unsigned long len)
149+
{
150+
struct mmu_rb_node *node;
151+
152+
trace_hfi1_mmu_rb_search(addr, len);
153+
node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1);
154+
if (node)
155+
list_move_tail(&node->list, &handler->lru_list);
156+
return node;
157+
}
158+
146159
/* Caller must hold handler lock */
147160
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
148161
unsigned long addr,
@@ -167,34 +180,6 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
167180
return node;
168181
}
169182

170-
bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
171-
unsigned long addr, unsigned long len,
172-
struct mmu_rb_node **rb_node)
173-
{
174-
struct mmu_rb_node *node;
175-
unsigned long flags;
176-
bool ret = false;
177-
178-
if (current->mm != handler->mn.mm)
179-
return ret;
180-
181-
spin_lock_irqsave(&handler->lock, flags);
182-
node = __mmu_rb_search(handler, addr, len);
183-
if (node) {
184-
if (node->addr == addr && node->len == len) {
185-
list_move_tail(&node->list, &handler->lru_list);
186-
goto unlock;
187-
}
188-
__mmu_int_rb_remove(node, &handler->root);
189-
list_del(&node->list); /* remove from LRU list */
190-
ret = true;
191-
}
192-
unlock:
193-
spin_unlock_irqrestore(&handler->lock, flags);
194-
*rb_node = node;
195-
return ret;
196-
}
197-
198183
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
199184
{
200185
struct mmu_rb_node *rbnode, *ptr;
@@ -225,29 +210,6 @@ void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
225210
}
226211
}
227212

228-
/*
229-
* It is up to the caller to ensure that this function does not race with the
230-
* mmu invalidate notifier which may be calling the users remove callback on
231-
* 'node'.
232-
*/
233-
void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
234-
struct mmu_rb_node *node)
235-
{
236-
unsigned long flags;
237-
238-
if (current->mm != handler->mn.mm)
239-
return;
240-
241-
/* Validity of handler and node pointers has been checked by caller. */
242-
trace_hfi1_mmu_rb_remove(node->addr, node->len);
243-
spin_lock_irqsave(&handler->lock, flags);
244-
__mmu_int_rb_remove(node, &handler->root);
245-
list_del(&node->list); /* remove from LRU list */
246-
spin_unlock_irqrestore(&handler->lock, flags);
247-
248-
handler->ops->remove(handler->ops_arg, node);
249-
}
250-
251213
static int mmu_notifier_range_start(struct mmu_notifier *mn,
252214
const struct mmu_notifier_range *range)
253215
{

drivers/infiniband/hw/hfi1/mmu_rb.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,8 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
5252
int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
5353
struct mmu_rb_node *mnode);
5454
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
55-
void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
56-
struct mmu_rb_node *mnode);
57-
bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
58-
unsigned long addr, unsigned long len,
59-
struct mmu_rb_node **rb_node);
55+
struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
56+
unsigned long addr,
57+
unsigned long len);
6058

6159
#endif /* _HFI1_MMU_RB_H */

drivers/infiniband/hw/hfi1/sdma.c

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,22 +1593,7 @@ static inline void sdma_unmap_desc(
15931593
struct hfi1_devdata *dd,
15941594
struct sdma_desc *descp)
15951595
{
1596-
switch (sdma_mapping_type(descp)) {
1597-
case SDMA_MAP_SINGLE:
1598-
dma_unmap_single(
1599-
&dd->pcidev->dev,
1600-
sdma_mapping_addr(descp),
1601-
sdma_mapping_len(descp),
1602-
DMA_TO_DEVICE);
1603-
break;
1604-
case SDMA_MAP_PAGE:
1605-
dma_unmap_page(
1606-
&dd->pcidev->dev,
1607-
sdma_mapping_addr(descp),
1608-
sdma_mapping_len(descp),
1609-
DMA_TO_DEVICE);
1610-
break;
1611-
}
1596+
system_descriptor_complete(dd, descp);
16121597
}
16131598

16141599
/*
@@ -3128,7 +3113,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
31283113

31293114
/* Add descriptor for coalesce buffer */
31303115
tx->desc_limit = MAX_DESC;
3131-
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
3116+
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx,
31323117
addr, tx->tlen);
31333118
}
31343119

@@ -3167,10 +3152,12 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
31673152
return rval;
31683153
}
31693154
}
3155+
31703156
/* finish the one just added */
31713157
make_tx_sdma_desc(
31723158
tx,
31733159
SDMA_MAP_NONE,
3160+
NULL,
31743161
dd->sdma_pad_phys,
31753162
sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
31763163
tx->num_desc++;

drivers/infiniband/hw/hfi1/sdma.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,7 @@ static inline dma_addr_t sdma_mapping_addr(struct sdma_desc *d)
594594
static inline void make_tx_sdma_desc(
595595
struct sdma_txreq *tx,
596596
int type,
597+
void *pinning_ctx,
597598
dma_addr_t addr,
598599
size_t len)
599600
{
@@ -612,6 +613,7 @@ static inline void make_tx_sdma_desc(
612613
<< SDMA_DESC0_PHY_ADDR_SHIFT) |
613614
(((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
614615
<< SDMA_DESC0_BYTE_COUNT_SHIFT);
616+
desc->pinning_ctx = pinning_ctx;
615617
}
616618

617619
/* helper to extend txreq */
@@ -643,6 +645,7 @@ static inline void _sdma_close_tx(struct hfi1_devdata *dd,
643645
static inline int _sdma_txadd_daddr(
644646
struct hfi1_devdata *dd,
645647
int type,
648+
void *pinning_ctx,
646649
struct sdma_txreq *tx,
647650
dma_addr_t addr,
648651
u16 len)
@@ -652,6 +655,7 @@ static inline int _sdma_txadd_daddr(
652655
make_tx_sdma_desc(
653656
tx,
654657
type,
658+
pinning_ctx,
655659
addr, len);
656660
WARN_ON(len > tx->tlen);
657661
tx->num_desc++;
@@ -672,6 +676,7 @@ static inline int _sdma_txadd_daddr(
672676
/**
673677
* sdma_txadd_page() - add a page to the sdma_txreq
674678
* @dd: the device to use for mapping
679+
* @pinning_ctx: context to be released at descriptor retirement
675680
* @tx: tx request to which the page is added
676681
* @page: page to map
677682
* @offset: offset within the page
@@ -687,6 +692,7 @@ static inline int _sdma_txadd_daddr(
687692
*/
688693
static inline int sdma_txadd_page(
689694
struct hfi1_devdata *dd,
695+
void *pinning_ctx,
690696
struct sdma_txreq *tx,
691697
struct page *page,
692698
unsigned long offset,
@@ -714,8 +720,7 @@ static inline int sdma_txadd_page(
714720
return -ENOSPC;
715721
}
716722

717-
return _sdma_txadd_daddr(
718-
dd, SDMA_MAP_PAGE, tx, addr, len);
723+
return _sdma_txadd_daddr(dd, SDMA_MAP_PAGE, pinning_ctx, tx, addr, len);
719724
}
720725

721726
/**
@@ -749,7 +754,8 @@ static inline int sdma_txadd_daddr(
749754
return rval;
750755
}
751756

752-
return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len);
757+
return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, NULL, tx,
758+
addr, len);
753759
}
754760

755761
/**
@@ -795,8 +801,7 @@ static inline int sdma_txadd_kvaddr(
795801
return -ENOSPC;
796802
}
797803

798-
return _sdma_txadd_daddr(
799-
dd, SDMA_MAP_SINGLE, tx, addr, len);
804+
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, NULL, tx, addr, len);
800805
}
801806

802807
struct iowait_work;
@@ -1030,4 +1035,5 @@ extern uint mod_num_sdma;
10301035

10311036
void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);
10321037

1038+
void system_descriptor_complete(struct hfi1_devdata *dd, struct sdma_desc *descp);
10331039
#endif

drivers/infiniband/hw/hfi1/sdma_txreq.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
struct sdma_desc {
2020
/* private: don't use directly */
2121
u64 qw[2];
22+
void *pinning_ctx;
2223
};
2324

2425
/**

drivers/infiniband/hw/hfi1/trace_mmu.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,6 @@ DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_search,
3737
TP_PROTO(unsigned long addr, unsigned long len),
3838
TP_ARGS(addr, len));
3939

40-
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_rb_remove,
41-
TP_PROTO(unsigned long addr, unsigned long len),
42-
TP_ARGS(addr, len));
43-
4440
DEFINE_EVENT(hfi1_mmu_rb_template, hfi1_mmu_mem_invalidate,
4541
TP_PROTO(unsigned long addr, unsigned long len),
4642
TP_ARGS(addr, len));

0 commit comments

Comments
 (0)