@@ -113,6 +113,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
113113/* Pick a register outside of BPF range for JIT internal work */
114114#define AUX_REG (MAX_BPF_JIT_REG + 1)
115115#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
116+ #define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
116117
117118/*
118119 * The following table maps BPF registers to x86-64 registers.
@@ -139,6 +140,7 @@ static const int reg2hex[] = {
139140 [BPF_REG_AX ] = 2 , /* R10 temp register */
140141 [AUX_REG ] = 3 , /* R11 temp register */
141142 [X86_REG_R9 ] = 1 , /* R9 register, 6th function argument */
143+ [X86_REG_R12 ] = 4 , /* R12 callee saved */
142144};
143145
144146static const int reg2pt_regs [] = {
@@ -167,6 +169,7 @@ static bool is_ereg(u32 reg)
167169 BIT (BPF_REG_8 ) |
168170 BIT (BPF_REG_9 ) |
169171 BIT (X86_REG_R9 ) |
172+ BIT (X86_REG_R12 ) |
170173 BIT (BPF_REG_AX ));
171174}
172175
@@ -205,6 +208,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
205208 return byte ;
206209}
207210
211+ static u8 add_3mod (u8 byte , u32 r1 , u32 r2 , u32 index )
212+ {
213+ if (is_ereg (r1 ))
214+ byte |= 1 ;
215+ if (is_ereg (index ))
216+ byte |= 2 ;
217+ if (is_ereg (r2 ))
218+ byte |= 4 ;
219+ return byte ;
220+ }
221+
208222/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
209223static u8 add_1reg (u8 byte , u32 dst_reg )
210224{
@@ -645,6 +659,8 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
645659 pop_r12 (& prog );
646660 } else {
647661 pop_callee_regs (& prog , callee_regs_used );
662+ if (bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena ))
663+ pop_r12 (& prog );
648664 }
649665
650666 EMIT1 (0x58 ); /* pop rax */
@@ -704,6 +720,8 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
704720 pop_r12 (& prog );
705721 } else {
706722 pop_callee_regs (& prog , callee_regs_used );
723+ if (bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena ))
724+ pop_r12 (& prog );
707725 }
708726
709727 EMIT1 (0x58 ); /* pop rax */
@@ -887,6 +905,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
887905 * pprog = prog ;
888906}
889907
908+ static void emit_insn_suffix_SIB (u8 * * pprog , u32 ptr_reg , u32 val_reg , u32 index_reg , int off )
909+ {
910+ u8 * prog = * pprog ;
911+
912+ if (is_imm8 (off )) {
913+ EMIT3 (add_2reg (0x44 , BPF_REG_0 , val_reg ), add_2reg (0 , ptr_reg , index_reg ) /* SIB */ , off );
914+ } else {
915+ EMIT2_off32 (add_2reg (0x84 , BPF_REG_0 , val_reg ), add_2reg (0 , ptr_reg , index_reg ) /* SIB */ , off );
916+ }
917+ * pprog = prog ;
918+ }
919+
890920/*
891921 * Emit a REX byte if it will be necessary to address these registers
892922 */
@@ -968,6 +998,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
968998 * pprog = prog ;
969999}
9701000
1001+ static void emit_ldx_index (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , u32 index_reg , int off )
1002+ {
1003+ u8 * prog = * pprog ;
1004+
1005+ switch (size ) {
1006+ case BPF_B :
1007+ /* movzx rax, byte ptr [rax + r12 + off] */
1008+ EMIT3 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x0F , 0xB6 );
1009+ break ;
1010+ case BPF_H :
1011+ /* movzx rax, word ptr [rax + r12 + off] */
1012+ EMIT3 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x0F , 0xB7 );
1013+ break ;
1014+ case BPF_W :
1015+ /* mov eax, dword ptr [rax + r12 + off] */
1016+ EMIT2 (add_3mod (0x40 , src_reg , dst_reg , index_reg ), 0x8B );
1017+ break ;
1018+ case BPF_DW :
1019+ /* mov rax, qword ptr [rax + r12 + off] */
1020+ EMIT2 (add_3mod (0x48 , src_reg , dst_reg , index_reg ), 0x8B );
1021+ break ;
1022+ }
1023+ emit_insn_suffix_SIB (& prog , src_reg , dst_reg , index_reg , off );
1024+ * pprog = prog ;
1025+ }
1026+
1027+ static void emit_ldx_r12 (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
1028+ {
1029+ emit_ldx_index (pprog , size , dst_reg , src_reg , X86_REG_R12 , off );
1030+ }
1031+
9711032/* STX: *(u8*)(dst_reg + off) = src_reg */
9721033static void emit_stx (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
9731034{
@@ -1002,6 +1063,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
10021063 * pprog = prog ;
10031064}
10041065
1066+ /* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
1067+ static void emit_stx_index (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , u32 index_reg , int off )
1068+ {
1069+ u8 * prog = * pprog ;
1070+
1071+ switch (size ) {
1072+ case BPF_B :
1073+ /* mov byte ptr [rax + r12 + off], al */
1074+ EMIT2 (add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x88 );
1075+ break ;
1076+ case BPF_H :
1077+ /* mov word ptr [rax + r12 + off], ax */
1078+ EMIT3 (0x66 , add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x89 );
1079+ break ;
1080+ case BPF_W :
1081+ /* mov dword ptr [rax + r12 + 1], eax */
1082+ EMIT2 (add_3mod (0x40 , dst_reg , src_reg , index_reg ), 0x89 );
1083+ break ;
1084+ case BPF_DW :
1085+ /* mov qword ptr [rax + r12 + 1], rax */
1086+ EMIT2 (add_3mod (0x48 , dst_reg , src_reg , index_reg ), 0x89 );
1087+ break ;
1088+ }
1089+ emit_insn_suffix_SIB (& prog , dst_reg , src_reg , index_reg , off );
1090+ * pprog = prog ;
1091+ }
1092+
1093+ static void emit_stx_r12 (u8 * * pprog , u32 size , u32 dst_reg , u32 src_reg , int off )
1094+ {
1095+ emit_stx_index (pprog , size , dst_reg , src_reg , X86_REG_R12 , off );
1096+ }
1097+
1098+ /* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
1099+ static void emit_st_index (u8 * * pprog , u32 size , u32 dst_reg , u32 index_reg , int off , int imm )
1100+ {
1101+ u8 * prog = * pprog ;
1102+
1103+ switch (size ) {
1104+ case BPF_B :
1105+ /* mov byte ptr [rax + r12 + off], imm8 */
1106+ EMIT2 (add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC6 );
1107+ break ;
1108+ case BPF_H :
1109+ /* mov word ptr [rax + r12 + off], imm16 */
1110+ EMIT3 (0x66 , add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC7 );
1111+ break ;
1112+ case BPF_W :
1113+ /* mov dword ptr [rax + r12 + 1], imm32 */
1114+ EMIT2 (add_3mod (0x40 , dst_reg , 0 , index_reg ), 0xC7 );
1115+ break ;
1116+ case BPF_DW :
1117+ /* mov qword ptr [rax + r12 + 1], imm32 */
1118+ EMIT2 (add_3mod (0x48 , dst_reg , 0 , index_reg ), 0xC7 );
1119+ break ;
1120+ }
1121+ emit_insn_suffix_SIB (& prog , dst_reg , 0 , index_reg , off );
1122+ EMIT (imm , bpf_size_to_x86_bytes (size ));
1123+ * pprog = prog ;
1124+ }
1125+
1126+ static void emit_st_r12 (u8 * * pprog , u32 size , u32 dst_reg , int off , int imm )
1127+ {
1128+ emit_st_index (pprog , size , dst_reg , X86_REG_R12 , off , imm );
1129+ }
1130+
10051131static int emit_atomic (u8 * * pprog , u8 atomic_op ,
10061132 u32 dst_reg , u32 src_reg , s16 off , u8 bpf_size )
10071133{
@@ -1043,12 +1169,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
10431169 return 0 ;
10441170}
10451171
1172+ #define DONT_CLEAR 1
1173+
10461174bool ex_handler_bpf (const struct exception_table_entry * x , struct pt_regs * regs )
10471175{
10481176 u32 reg = x -> fixup >> 8 ;
10491177
10501178 /* jump over faulting load and clear dest register */
1051- * (unsigned long * )((void * )regs + reg ) = 0 ;
1179+ if (reg != DONT_CLEAR )
1180+ * (unsigned long * )((void * )regs + reg ) = 0 ;
10521181 regs -> ip += x -> fixup & 0xff ;
10531182 return true;
10541183}
@@ -1147,11 +1276,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
11471276 bool tail_call_seen = false;
11481277 bool seen_exit = false;
11491278 u8 temp [BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY ];
1279+ u64 arena_vm_start ;
11501280 int i , excnt = 0 ;
11511281 int ilen , proglen = 0 ;
11521282 u8 * prog = temp ;
11531283 int err ;
11541284
1285+ arena_vm_start = bpf_arena_get_kern_vm_start (bpf_prog -> aux -> arena );
1286+
11551287 detect_reg_usage (insn , insn_cnt , callee_regs_used ,
11561288 & tail_call_seen );
11571289
@@ -1172,8 +1304,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
11721304 push_r12 (& prog );
11731305 push_callee_regs (& prog , all_callee_regs_used );
11741306 } else {
1307+ if (arena_vm_start )
1308+ push_r12 (& prog );
11751309 push_callee_regs (& prog , callee_regs_used );
11761310 }
1311+ if (arena_vm_start )
1312+ emit_mov_imm64 (& prog , X86_REG_R12 ,
1313+ arena_vm_start >> 32 , (u32 ) arena_vm_start );
11771314
11781315 ilen = prog - temp ;
11791316 if (rw_image )
@@ -1564,6 +1701,56 @@ st: if (is_imm8(insn->off))
15641701 emit_stx (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
15651702 break ;
15661703
1704+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B :
1705+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H :
1706+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W :
1707+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW :
1708+ start_of_ldx = prog ;
1709+ emit_st_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , insn -> off , insn -> imm );
1710+ goto populate_extable ;
1711+
1712+ /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
1713+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B :
1714+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H :
1715+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W :
1716+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW :
1717+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B :
1718+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H :
1719+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W :
1720+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW :
1721+ start_of_ldx = prog ;
1722+ if (BPF_CLASS (insn -> code ) == BPF_LDX )
1723+ emit_ldx_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
1724+ else
1725+ emit_stx_r12 (& prog , BPF_SIZE (insn -> code ), dst_reg , src_reg , insn -> off );
1726+ populate_extable :
1727+ {
1728+ struct exception_table_entry * ex ;
1729+ u8 * _insn = image + proglen + (start_of_ldx - temp );
1730+ s64 delta ;
1731+
1732+ if (!bpf_prog -> aux -> extable )
1733+ break ;
1734+
1735+ if (excnt >= bpf_prog -> aux -> num_exentries ) {
1736+ pr_err ("mem32 extable bug\n" );
1737+ return - EFAULT ;
1738+ }
1739+ ex = & bpf_prog -> aux -> extable [excnt ++ ];
1740+
1741+ delta = _insn - (u8 * )& ex -> insn ;
1742+ /* switch ex to rw buffer for writes */
1743+ ex = (void * )rw_image + ((void * )ex - (void * )image );
1744+
1745+ ex -> insn = delta ;
1746+
1747+ ex -> data = EX_TYPE_BPF ;
1748+
1749+ ex -> fixup = (prog - start_of_ldx ) |
1750+ ((BPF_CLASS (insn -> code ) == BPF_LDX ? reg2pt_regs [dst_reg ] : DONT_CLEAR ) << 8 );
1751+ }
1752+ break ;
1753+
15671754 /* LDX: dst_reg = *(u8*)(src_reg + off) */
15681755 case BPF_LDX | BPF_MEM | BPF_B :
15691756 case BPF_LDX | BPF_PROBE_MEM | BPF_B :
@@ -2036,6 +2223,8 @@ st: if (is_imm8(insn->off))
20362223 pop_r12 (& prog );
20372224 } else {
20382225 pop_callee_regs (& prog , callee_regs_used );
2226+ if (arena_vm_start )
2227+ pop_r12 (& prog );
20392228 }
20402229 EMIT1 (0xC9 ); /* leave */
20412230 emit_return (& prog , image + addrs [i - 1 ] + (prog - temp ));
0 commit comments