99#include <sys/mman.h>
1010#include <sys/utsname.h>
1111#include <sys/wait.h>
12+ #include <sys/stat.h>
13+ #include <fcntl.h>
1214#include <inttypes.h>
1315
16+ #include <sys/uio.h>
17+ #include <linux/io_uring.h>
1418#include "../kselftest.h"
1519
1620#ifndef __x86_64__
3236#define FUNC_BITS 0x2
3337#define FUNC_MMAP 0x4
3438#define FUNC_SYSCALL 0x8
39+ #define FUNC_URING 0x10
3540
36- #define TEST_MASK 0xf
41+ #define TEST_MASK 0x1f
3742
3843#define LOW_ADDR (0x1UL << 30)
3944#define HIGH_ADDR (0x3UL << 48)
4247
4348#define PAGE_SIZE (4 << 10)
4449
50+ #define barrier () ({ \
51+ __asm__ __volatile__("" : : : "memory"); \
52+ })
53+
54+ #define URING_QUEUE_SZ 1
55+ #define URING_BLOCK_SZ 2048
56+
4557struct testcases {
4658 unsigned int later ;
4759 int expected ; /* 2: SIGSEGV Error; 1: other errors */
@@ -51,6 +63,33 @@ struct testcases {
5163 const char * msg ;
5264};
5365
66+ /* Used by CQ of uring, source file handler and file's size */
67+ struct file_io {
68+ int file_fd ;
69+ off_t file_sz ;
70+ struct iovec iovecs [];
71+ };
72+
73+ struct io_uring_queue {
74+ unsigned int * head ;
75+ unsigned int * tail ;
76+ unsigned int * ring_mask ;
77+ unsigned int * ring_entries ;
78+ unsigned int * flags ;
79+ unsigned int * array ;
80+ union {
81+ struct io_uring_cqe * cqes ;
82+ struct io_uring_sqe * sqes ;
83+ } queue ;
84+ size_t ring_sz ;
85+ };
86+
87+ struct io_ring {
88+ int ring_fd ;
89+ struct io_uring_queue sq_ring ;
90+ struct io_uring_queue cq_ring ;
91+ };
92+
5493int tests_cnt ;
5594jmp_buf segv_env ;
5695
@@ -294,6 +333,285 @@ static int handle_syscall(struct testcases *test)
294333 return ret ;
295334}
296335
336+ int sys_uring_setup (unsigned int entries , struct io_uring_params * p )
337+ {
338+ return (int )syscall (__NR_io_uring_setup , entries , p );
339+ }
340+
341+ int sys_uring_enter (int fd , unsigned int to , unsigned int min , unsigned int flags )
342+ {
343+ return (int )syscall (__NR_io_uring_enter , fd , to , min , flags , NULL , 0 );
344+ }
345+
346+ /* Init submission queue and completion queue */
347+ int mmap_io_uring (struct io_uring_params p , struct io_ring * s )
348+ {
349+ struct io_uring_queue * sring = & s -> sq_ring ;
350+ struct io_uring_queue * cring = & s -> cq_ring ;
351+
352+ sring -> ring_sz = p .sq_off .array + p .sq_entries * sizeof (unsigned int );
353+ cring -> ring_sz = p .cq_off .cqes + p .cq_entries * sizeof (struct io_uring_cqe );
354+
355+ if (p .features & IORING_FEAT_SINGLE_MMAP ) {
356+ if (cring -> ring_sz > sring -> ring_sz )
357+ sring -> ring_sz = cring -> ring_sz ;
358+
359+ cring -> ring_sz = sring -> ring_sz ;
360+ }
361+
362+ void * sq_ptr = mmap (0 , sring -> ring_sz , PROT_READ | PROT_WRITE ,
363+ MAP_SHARED | MAP_POPULATE , s -> ring_fd ,
364+ IORING_OFF_SQ_RING );
365+
366+ if (sq_ptr == MAP_FAILED ) {
367+ perror ("sub-queue!" );
368+ return 1 ;
369+ }
370+
371+ void * cq_ptr = sq_ptr ;
372+
373+ if (!(p .features & IORING_FEAT_SINGLE_MMAP )) {
374+ cq_ptr = mmap (0 , cring -> ring_sz , PROT_READ | PROT_WRITE ,
375+ MAP_SHARED | MAP_POPULATE , s -> ring_fd ,
376+ IORING_OFF_CQ_RING );
377+ if (cq_ptr == MAP_FAILED ) {
378+ perror ("cpl-queue!" );
379+ munmap (sq_ptr , sring -> ring_sz );
380+ return 1 ;
381+ }
382+ }
383+
384+ sring -> head = sq_ptr + p .sq_off .head ;
385+ sring -> tail = sq_ptr + p .sq_off .tail ;
386+ sring -> ring_mask = sq_ptr + p .sq_off .ring_mask ;
387+ sring -> ring_entries = sq_ptr + p .sq_off .ring_entries ;
388+ sring -> flags = sq_ptr + p .sq_off .flags ;
389+ sring -> array = sq_ptr + p .sq_off .array ;
390+
391+ /* Map a queue as mem map */
392+ s -> sq_ring .queue .sqes = mmap (0 , p .sq_entries * sizeof (struct io_uring_sqe ),
393+ PROT_READ | PROT_WRITE , MAP_SHARED | MAP_POPULATE ,
394+ s -> ring_fd , IORING_OFF_SQES );
395+ if (s -> sq_ring .queue .sqes == MAP_FAILED ) {
396+ munmap (sq_ptr , sring -> ring_sz );
397+ if (sq_ptr != cq_ptr ) {
398+ ksft_print_msg ("failed to mmap uring queue!" );
399+ munmap (cq_ptr , cring -> ring_sz );
400+ return 1 ;
401+ }
402+ }
403+
404+ cring -> head = cq_ptr + p .cq_off .head ;
405+ cring -> tail = cq_ptr + p .cq_off .tail ;
406+ cring -> ring_mask = cq_ptr + p .cq_off .ring_mask ;
407+ cring -> ring_entries = cq_ptr + p .cq_off .ring_entries ;
408+ cring -> queue .cqes = cq_ptr + p .cq_off .cqes ;
409+
410+ return 0 ;
411+ }
412+
413+ /* Init io_uring queues */
414+ int setup_io_uring (struct io_ring * s )
415+ {
416+ struct io_uring_params para ;
417+
418+ memset (& para , 0 , sizeof (para ));
419+ s -> ring_fd = sys_uring_setup (URING_QUEUE_SZ , & para );
420+ if (s -> ring_fd < 0 )
421+ return 1 ;
422+
423+ return mmap_io_uring (para , s );
424+ }
425+
426+ /*
427+ * Get data from completion queue. the data buffer saved the file data
428+ * return 0: success; others: error;
429+ */
430+ int handle_uring_cq (struct io_ring * s )
431+ {
432+ struct file_io * fi = NULL ;
433+ struct io_uring_queue * cring = & s -> cq_ring ;
434+ struct io_uring_cqe * cqe ;
435+ unsigned int head ;
436+ off_t len = 0 ;
437+
438+ head = * cring -> head ;
439+
440+ do {
441+ barrier ();
442+ if (head == * cring -> tail )
443+ break ;
444+ /* Get the entry */
445+ cqe = & cring -> queue .cqes [head & * s -> cq_ring .ring_mask ];
446+ fi = (struct file_io * )cqe -> user_data ;
447+ if (cqe -> res < 0 )
448+ break ;
449+
450+ int blocks = (int )(fi -> file_sz + URING_BLOCK_SZ - 1 ) / URING_BLOCK_SZ ;
451+
452+ for (int i = 0 ; i < blocks ; i ++ )
453+ len += fi -> iovecs [i ].iov_len ;
454+
455+ head ++ ;
456+ } while (1 );
457+
458+ * cring -> head = head ;
459+ barrier ();
460+
461+ return (len != fi -> file_sz );
462+ }
463+
464+ /*
465+ * Submit squeue. specify via IORING_OP_READV.
466+ * the buffer need to be set metadata according to LAM mode
467+ */
468+ int handle_uring_sq (struct io_ring * ring , struct file_io * fi , unsigned long lam )
469+ {
470+ int file_fd = fi -> file_fd ;
471+ struct io_uring_queue * sring = & ring -> sq_ring ;
472+ unsigned int index = 0 , cur_block = 0 , tail = 0 , next_tail = 0 ;
473+ struct io_uring_sqe * sqe ;
474+
475+ off_t remain = fi -> file_sz ;
476+ int blocks = (int )(remain + URING_BLOCK_SZ - 1 ) / URING_BLOCK_SZ ;
477+
478+ while (remain ) {
479+ off_t bytes = remain ;
480+ void * buf ;
481+
482+ if (bytes > URING_BLOCK_SZ )
483+ bytes = URING_BLOCK_SZ ;
484+
485+ fi -> iovecs [cur_block ].iov_len = bytes ;
486+
487+ if (posix_memalign (& buf , URING_BLOCK_SZ , URING_BLOCK_SZ ))
488+ return 1 ;
489+
490+ fi -> iovecs [cur_block ].iov_base = (void * )set_metadata ((uint64_t )buf , lam );
491+ remain -= bytes ;
492+ cur_block ++ ;
493+ }
494+
495+ next_tail = * sring -> tail ;
496+ tail = next_tail ;
497+ next_tail ++ ;
498+
499+ barrier ();
500+
501+ index = tail & * ring -> sq_ring .ring_mask ;
502+
503+ sqe = & ring -> sq_ring .queue .sqes [index ];
504+ sqe -> fd = file_fd ;
505+ sqe -> flags = 0 ;
506+ sqe -> opcode = IORING_OP_READV ;
507+ sqe -> addr = (unsigned long )fi -> iovecs ;
508+ sqe -> len = blocks ;
509+ sqe -> off = 0 ;
510+ sqe -> user_data = (uint64_t )fi ;
511+
512+ sring -> array [index ] = index ;
513+ tail = next_tail ;
514+
515+ if (* sring -> tail != tail ) {
516+ * sring -> tail = tail ;
517+ barrier ();
518+ }
519+
520+ if (sys_uring_enter (ring -> ring_fd , 1 , 1 , IORING_ENTER_GETEVENTS ) < 0 )
521+ return 1 ;
522+
523+ return 0 ;
524+ }
525+
526+ /*
527+ * Test LAM in async I/O and io_uring, read current binery through io_uring
528+ * Set metadata in pointers to iovecs buffer.
529+ */
530+ int do_uring (unsigned long lam )
531+ {
532+ struct io_ring * ring ;
533+ struct file_io * fi ;
534+ struct stat st ;
535+ int ret = 1 ;
536+ char path [PATH_MAX ];
537+
538+ /* get current process path */
539+ if (readlink ("/proc/self/exe" , path , PATH_MAX ) <= 0 )
540+ return 1 ;
541+
542+ int file_fd = open (path , O_RDONLY );
543+
544+ if (file_fd < 0 )
545+ return 1 ;
546+
547+ if (fstat (file_fd , & st ) < 0 )
548+ return 1 ;
549+
550+ off_t file_sz = st .st_size ;
551+
552+ int blocks = (int )(file_sz + URING_BLOCK_SZ - 1 ) / URING_BLOCK_SZ ;
553+
554+ fi = malloc (sizeof (* fi ) + sizeof (struct iovec ) * blocks );
555+ if (!fi )
556+ return 1 ;
557+
558+ fi -> file_sz = file_sz ;
559+ fi -> file_fd = file_fd ;
560+
561+ ring = malloc (sizeof (* ring ));
562+ if (!ring )
563+ return 1 ;
564+
565+ memset (ring , 0 , sizeof (struct io_ring ));
566+
567+ if (setup_io_uring (ring ))
568+ goto out ;
569+
570+ if (handle_uring_sq (ring , fi , lam ))
571+ goto out ;
572+
573+ ret = handle_uring_cq (ring );
574+
575+ out :
576+ free (ring );
577+
578+ for (int i = 0 ; i < blocks ; i ++ ) {
579+ if (fi -> iovecs [i ].iov_base ) {
580+ uint64_t addr = ((uint64_t )fi -> iovecs [i ].iov_base );
581+
582+ switch (lam ) {
583+ case LAM_U57_BITS : /* Clear bits 62:57 */
584+ addr = (addr & ~(0x3fULL << 57 ));
585+ break ;
586+ }
587+ free ((void * )addr );
588+ fi -> iovecs [i ].iov_base = NULL ;
589+ }
590+ }
591+
592+ free (fi );
593+
594+ return ret ;
595+ }
596+
597+ int handle_uring (struct testcases * test )
598+ {
599+ int ret = 0 ;
600+
601+ if (test -> later == 0 && test -> lam != 0 )
602+ if (set_lam (test -> lam ) != 0 )
603+ return 1 ;
604+
605+ if (sigsetjmp (segv_env , 1 ) == 0 ) {
606+ signal (SIGSEGV , segv_handler );
607+ ret = do_uring (test -> lam );
608+ } else {
609+ ret = 2 ;
610+ }
611+
612+ return ret ;
613+ }
614+
297615static int fork_test (struct testcases * test )
298616{
299617 int ret , child_ret ;
@@ -340,6 +658,22 @@ static void run_test(struct testcases *test, int count)
340658 }
341659}
342660
661+ static struct testcases uring_cases [] = {
662+ {
663+ .later = 0 ,
664+ .lam = LAM_U57_BITS ,
665+ .test_func = handle_uring ,
666+ .msg = "URING: LAM_U57. Dereferencing pointer with metadata\n" ,
667+ },
668+ {
669+ .later = 1 ,
670+ .expected = 1 ,
671+ .lam = LAM_U57_BITS ,
672+ .test_func = handle_uring ,
673+ .msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n" ,
674+ },
675+ };
676+
343677static struct testcases malloc_cases [] = {
344678 {
345679 .later = 0 ,
@@ -410,7 +744,7 @@ static void cmd_help(void)
410744{
411745 printf ("usage: lam [-h] [-t test list]\n" );
412746 printf ("\t-t test list: run tests specified in the test list, default:0x%x\n" , TEST_MASK );
413- printf ("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall.\n" );
747+ printf ("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring .\n" );
414748 printf ("\t-h: help\n" );
415749}
416750
@@ -456,6 +790,9 @@ int main(int argc, char **argv)
456790 if (tests & FUNC_SYSCALL )
457791 run_test (syscall_cases , ARRAY_SIZE (syscall_cases ));
458792
793+ if (tests & FUNC_URING )
794+ run_test (uring_cases , ARRAY_SIZE (uring_cases ));
795+
459796 ksft_set_plan (tests_cnt );
460797
461798 return ksft_exit_pass ();
0 commit comments