@@ -264,6 +264,12 @@ struct io_rsrc_data {
264264 bool quiesce ;
265265};
266266
267+ struct io_buffer_list {
268+ struct list_head list ;
269+ struct list_head buf_list ;
270+ __u16 bgid ;
271+ };
272+
267273struct io_buffer {
268274 struct list_head list ;
269275 __u64 addr ;
@@ -334,6 +340,8 @@ struct io_ev_fd {
334340 struct rcu_head rcu ;
335341};
336342
343+ #define IO_BUFFERS_HASH_BITS 5
344+
337345struct io_ring_ctx {
338346 /* const or read-mostly hot data */
339347 struct {
@@ -386,7 +394,7 @@ struct io_ring_ctx {
386394 struct list_head timeout_list ;
387395 struct list_head ltimeout_list ;
388396 struct list_head cq_overflow_list ;
389- struct xarray io_buffers ;
397+ struct list_head * io_buffers ;
390398 struct list_head io_buffers_cache ;
391399 struct list_head apoll_cache ;
392400 struct xarray personalities ;
@@ -1361,32 +1369,34 @@ static inline unsigned int io_put_kbuf(struct io_kiocb *req,
13611369 return cflags ;
13621370}
13631371
1372+ static struct io_buffer_list * io_buffer_get_list (struct io_ring_ctx * ctx ,
1373+ unsigned int bgid )
1374+ {
1375+ struct list_head * hash_list ;
1376+ struct io_buffer_list * bl ;
1377+
1378+ hash_list = & ctx -> io_buffers [hash_32 (bgid , IO_BUFFERS_HASH_BITS )];
1379+ list_for_each_entry (bl , hash_list , list )
1380+ if (bl -> bgid == bgid || bgid == -1U )
1381+ return bl ;
1382+
1383+ return NULL ;
1384+ }
1385+
13641386static void io_kbuf_recycle (struct io_kiocb * req )
13651387{
13661388 struct io_ring_ctx * ctx = req -> ctx ;
1367- struct io_buffer * head , * buf ;
1389+ struct io_buffer_list * bl ;
1390+ struct io_buffer * buf ;
13681391
13691392 if (likely (!(req -> flags & REQ_F_BUFFER_SELECTED )))
13701393 return ;
13711394
13721395 lockdep_assert_held (& ctx -> uring_lock );
13731396
13741397 buf = req -> kbuf ;
1375-
1376- head = xa_load (& ctx -> io_buffers , buf -> bgid );
1377- if (head ) {
1378- list_add (& buf -> list , & head -> list );
1379- } else {
1380- int ret ;
1381-
1382- INIT_LIST_HEAD (& buf -> list );
1383-
1384- /* if we fail, just leave buffer attached */
1385- ret = xa_insert (& ctx -> io_buffers , buf -> bgid , buf , GFP_KERNEL );
1386- if (unlikely (ret < 0 ))
1387- return ;
1388- }
1389-
1398+ bl = io_buffer_get_list (ctx , buf -> bgid );
1399+ list_add (& buf -> list , & bl -> buf_list );
13901400 req -> flags &= ~REQ_F_BUFFER_SELECTED ;
13911401 req -> kbuf = NULL ;
13921402}
@@ -1501,7 +1511,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
15011511static __cold struct io_ring_ctx * io_ring_ctx_alloc (struct io_uring_params * p )
15021512{
15031513 struct io_ring_ctx * ctx ;
1504- int hash_bits ;
1514+ int i , hash_bits ;
15051515
15061516 ctx = kzalloc (sizeof (* ctx ), GFP_KERNEL );
15071517 if (!ctx )
@@ -1528,6 +1538,13 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
15281538 /* set invalid range, so io_import_fixed() fails meeting it */
15291539 ctx -> dummy_ubuf -> ubuf = -1UL ;
15301540
1541+ ctx -> io_buffers = kcalloc (1U << IO_BUFFERS_HASH_BITS ,
1542+ sizeof (struct list_head ), GFP_KERNEL );
1543+ if (!ctx -> io_buffers )
1544+ goto err ;
1545+ for (i = 0 ; i < (1U << IO_BUFFERS_HASH_BITS ); i ++ )
1546+ INIT_LIST_HEAD (& ctx -> io_buffers [i ]);
1547+
15311548 if (percpu_ref_init (& ctx -> refs , io_ring_ctx_ref_free ,
15321549 PERCPU_REF_ALLOW_REINIT , GFP_KERNEL ))
15331550 goto err ;
@@ -1539,7 +1556,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
15391556 INIT_LIST_HEAD (& ctx -> io_buffers_cache );
15401557 INIT_LIST_HEAD (& ctx -> apoll_cache );
15411558 init_completion (& ctx -> ref_comp );
1542- xa_init_flags (& ctx -> io_buffers , XA_FLAGS_ALLOC1 );
15431559 xa_init_flags (& ctx -> personalities , XA_FLAGS_ALLOC1 );
15441560 mutex_init (& ctx -> uring_lock );
15451561 init_waitqueue_head (& ctx -> cq_wait );
@@ -1568,6 +1584,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
15681584err :
15691585 kfree (ctx -> dummy_ubuf );
15701586 kfree (ctx -> cancel_hash );
1587+ kfree (ctx -> io_buffers );
15711588 kfree (ctx );
15721589 return NULL ;
15731590}
@@ -3351,30 +3368,36 @@ static void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock)
33513368 mutex_lock (& ctx -> uring_lock );
33523369}
33533370
3371+ static void io_buffer_add_list (struct io_ring_ctx * ctx ,
3372+ struct io_buffer_list * bl , unsigned int bgid )
3373+ {
3374+ struct list_head * list ;
3375+
3376+ list = & ctx -> io_buffers [hash_32 (bgid , IO_BUFFERS_HASH_BITS )];
3377+ INIT_LIST_HEAD (& bl -> buf_list );
3378+ bl -> bgid = bgid ;
3379+ list_add (& bl -> list , list );
3380+ }
3381+
33543382static struct io_buffer * io_buffer_select (struct io_kiocb * req , size_t * len ,
33553383 int bgid , unsigned int issue_flags )
33563384{
33573385 struct io_buffer * kbuf = req -> kbuf ;
3358- struct io_buffer * head ;
33593386 bool needs_lock = issue_flags & IO_URING_F_UNLOCKED ;
3387+ struct io_ring_ctx * ctx = req -> ctx ;
3388+ struct io_buffer_list * bl ;
33603389
33613390 if (req -> flags & REQ_F_BUFFER_SELECTED )
33623391 return kbuf ;
33633392
3364- io_ring_submit_lock (req -> ctx , needs_lock );
3393+ io_ring_submit_lock (ctx , needs_lock );
33653394
3366- lockdep_assert_held (& req -> ctx -> uring_lock );
3395+ lockdep_assert_held (& ctx -> uring_lock );
33673396
3368- head = xa_load (& req -> ctx -> io_buffers , bgid );
3369- if (head ) {
3370- if (!list_empty (& head -> list )) {
3371- kbuf = list_last_entry (& head -> list , struct io_buffer ,
3372- list );
3373- list_del (& kbuf -> list );
3374- } else {
3375- kbuf = head ;
3376- xa_erase (& req -> ctx -> io_buffers , bgid );
3377- }
3397+ bl = io_buffer_get_list (ctx , bgid );
3398+ if (bl && !list_empty (& bl -> buf_list )) {
3399+ kbuf = list_first_entry (& bl -> buf_list , struct io_buffer , list );
3400+ list_del (& kbuf -> list );
33783401 if (* len > kbuf -> len )
33793402 * len = kbuf -> len ;
33803403 req -> flags |= REQ_F_BUFFER_SELECTED ;
@@ -4669,8 +4692,8 @@ static int io_remove_buffers_prep(struct io_kiocb *req,
46694692 return 0 ;
46704693}
46714694
4672- static int __io_remove_buffers (struct io_ring_ctx * ctx , struct io_buffer * buf ,
4673- int bgid , unsigned nbufs )
4695+ static int __io_remove_buffers (struct io_ring_ctx * ctx ,
4696+ struct io_buffer_list * bl , unsigned nbufs )
46744697{
46754698 unsigned i = 0 ;
46764699
@@ -4679,17 +4702,16 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
46794702 return 0 ;
46804703
46814704 /* the head kbuf is the list itself */
4682- while (!list_empty (& buf -> list )) {
4705+ while (!list_empty (& bl -> buf_list )) {
46834706 struct io_buffer * nxt ;
46844707
4685- nxt = list_first_entry (& buf -> list , struct io_buffer , list );
4708+ nxt = list_first_entry (& bl -> buf_list , struct io_buffer , list );
46864709 list_del (& nxt -> list );
46874710 if (++ i == nbufs )
46884711 return i ;
46894712 cond_resched ();
46904713 }
46914714 i ++ ;
4692- xa_erase (& ctx -> io_buffers , bgid );
46934715
46944716 return i ;
46954717}
@@ -4698,7 +4720,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
46984720{
46994721 struct io_provide_buf * p = & req -> pbuf ;
47004722 struct io_ring_ctx * ctx = req -> ctx ;
4701- struct io_buffer * head ;
4723+ struct io_buffer_list * bl ;
47024724 int ret = 0 ;
47034725 bool needs_lock = issue_flags & IO_URING_F_UNLOCKED ;
47044726
@@ -4707,9 +4729,9 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
47074729 lockdep_assert_held (& ctx -> uring_lock );
47084730
47094731 ret = - ENOENT ;
4710- head = xa_load ( & ctx -> io_buffers , p -> bgid );
4711- if (head )
4712- ret = __io_remove_buffers (ctx , head , p -> bgid , p -> nbufs );
4732+ bl = io_buffer_get_list ( ctx , p -> bgid );
4733+ if (bl )
4734+ ret = __io_remove_buffers (ctx , bl , p -> nbufs );
47134735 if (ret < 0 )
47144736 req_set_fail (req );
47154737
@@ -4798,7 +4820,7 @@ static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
47984820}
47994821
48004822static int io_add_buffers (struct io_ring_ctx * ctx , struct io_provide_buf * pbuf ,
4801- struct io_buffer * * head )
4823+ struct io_buffer_list * bl )
48024824{
48034825 struct io_buffer * buf ;
48044826 u64 addr = pbuf -> addr ;
@@ -4810,45 +4832,43 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
48104832 break ;
48114833 buf = list_first_entry (& ctx -> io_buffers_cache , struct io_buffer ,
48124834 list );
4813- list_del (& buf -> list );
4835+ list_move_tail (& buf -> list , & bl -> buf_list );
48144836 buf -> addr = addr ;
48154837 buf -> len = min_t (__u32 , pbuf -> len , MAX_RW_COUNT );
48164838 buf -> bid = bid ;
48174839 buf -> bgid = pbuf -> bgid ;
48184840 addr += pbuf -> len ;
48194841 bid ++ ;
4820- if (!* head ) {
4821- INIT_LIST_HEAD (& buf -> list );
4822- * head = buf ;
4823- } else {
4824- list_add_tail (& buf -> list , & (* head )-> list );
4825- }
48264842 cond_resched ();
48274843 }
48284844
4829- return i ? i : - ENOMEM ;
4845+ return i ? 0 : - ENOMEM ;
48304846}
48314847
48324848static int io_provide_buffers (struct io_kiocb * req , unsigned int issue_flags )
48334849{
48344850 struct io_provide_buf * p = & req -> pbuf ;
48354851 struct io_ring_ctx * ctx = req -> ctx ;
4836- struct io_buffer * head , * list ;
4852+ struct io_buffer_list * bl ;
48374853 int ret = 0 ;
48384854 bool needs_lock = issue_flags & IO_URING_F_UNLOCKED ;
48394855
48404856 io_ring_submit_lock (ctx , needs_lock );
48414857
48424858 lockdep_assert_held (& ctx -> uring_lock );
48434859
4844- list = head = xa_load (& ctx -> io_buffers , p -> bgid );
4845-
4846- ret = io_add_buffers (ctx , p , & head );
4847- if (ret >= 0 && !list ) {
4848- ret = xa_insert (& ctx -> io_buffers , p -> bgid , head , GFP_KERNEL );
4849- if (ret < 0 )
4850- __io_remove_buffers (ctx , head , p -> bgid , -1U );
4860+ bl = io_buffer_get_list (ctx , p -> bgid );
4861+ if (unlikely (!bl )) {
4862+ bl = kmalloc (sizeof (* bl ), GFP_KERNEL );
4863+ if (!bl ) {
4864+ ret = - ENOMEM ;
4865+ goto err ;
4866+ }
4867+ io_buffer_add_list (ctx , bl , p -> bgid );
48514868 }
4869+
4870+ ret = io_add_buffers (ctx , p , bl );
4871+ err :
48524872 if (ret < 0 )
48534873 req_set_fail (req );
48544874 /* complete before unlock, IOPOLL may need the lock */
@@ -9936,11 +9956,20 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
99369956
99379957static void io_destroy_buffers (struct io_ring_ctx * ctx )
99389958{
9939- struct io_buffer * buf ;
9940- unsigned long index ;
9959+ int i ;
9960+
9961+ for (i = 0 ; i < (1U << IO_BUFFERS_HASH_BITS ); i ++ ) {
9962+ struct list_head * list = & ctx -> io_buffers [i ];
99419963
9942- xa_for_each (& ctx -> io_buffers , index , buf )
9943- __io_remove_buffers (ctx , buf , index , -1U );
9964+ while (!list_empty (list )) {
9965+ struct io_buffer_list * bl ;
9966+
9967+ bl = list_first_entry (list , struct io_buffer_list , list );
9968+ __io_remove_buffers (ctx , bl , -1U );
9969+ list_del (& bl -> list );
9970+ kfree (bl );
9971+ }
9972+ }
99449973
99459974 while (!list_empty (& ctx -> io_buffers_pages )) {
99469975 struct page * page ;
@@ -10049,6 +10078,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
1004910078 io_free_napi_list (ctx );
1005010079 kfree (ctx -> cancel_hash );
1005110080 kfree (ctx -> dummy_ubuf );
10081+ kfree (ctx -> io_buffers );
1005210082 kfree (ctx );
1005310083}
1005410084
0 commit comments