8484
8585#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
8686 movzbl src ## bh, RID1d; \
87+ leaq s1(%rip ), RID2; \
88+ movl (RID2,RID1,4 ), dst ## d; \
8789 movzbl src ## bl, RID2d; \
90+ leaq s2(%rip ), RID1; \
91+ op1 (RID1,RID2,4 ), dst ## d; \
8892 shrq $16 , src; \
89- movl s1(, RID1, 4 ), dst ## d; \
90- op1 s2(, RID2, 4 ), dst ## d; \
9193 movzbl src ## bh, RID1d; \
94+ leaq s3(%rip ), RID2; \
95+ op2 (RID2,RID1,4 ), dst ## d; \
9296 movzbl src ## bl, RID2d; \
9397 interleave_op(il_reg); \
94- op2 s3(, RID1, 4 ), dst ## d; \
95- op3 s4(, RID2, 4 ), dst ## d;
98+ leaq s4( %rip ), RID1; \
99+ op3 (RID1, RID2,4 ), dst ## d;
96100
97101#define dummy(d) /* do nothing */
98102
175179 qop(RD, RC, 1 );
176180
177181#define shuffle(mask) \
178- vpshufb mask, RKR, RKR;
182+ vpshufb mask( %rip ) , RKR, RKR;
179183
180184#define preload_rkr(n, do_mask, mask) \
181- vbroadcastss .L16_mask, RKR; \
185+ vbroadcastss .L16_mask( %rip ), RKR; \
182186 /* add 16-bit rotation to key rotations (mod 32) */ \
183187 vpxor (kr+n*16 )(CTX), RKR, RKR; \
184188 do_mask(mask);
@@ -258,9 +262,9 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
258262
259263 movq %rdi , CTX;
260264
261- vmovdqa .Lbswap_mask, RKM;
262- vmovd .Lfirst_mask, R1ST;
263- vmovd .L32_mask, R32;
265+ vmovdqa .Lbswap_mask( %rip ) , RKM;
266+ vmovd .Lfirst_mask( %rip ) , R1ST;
267+ vmovd .L32_mask( %rip ) , R32;
264268
265269 inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
266270 inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -284,7 +288,7 @@ SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
284288 popq %rbx ;
285289 popq %r15 ;
286290
287- vmovdqa .Lbswap_mask, RKM;
291+ vmovdqa .Lbswap_mask( %rip ) , RKM;
288292
289293 outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
290294 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -306,9 +310,9 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
306310
307311 movq %rdi , CTX;
308312
309- vmovdqa .Lbswap_mask, RKM;
310- vmovd .Lfirst_mask, R1ST;
311- vmovd .L32_mask, R32;
313+ vmovdqa .Lbswap_mask( %rip ) , RKM;
314+ vmovd .Lfirst_mask( %rip ) , R1ST;
315+ vmovd .L32_mask( %rip ) , R32;
312316
313317 inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
314318 inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
@@ -332,7 +336,7 @@ SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
332336 popq %rbx ;
333337 popq %r15 ;
334338
335- vmovdqa .Lbswap_mask, RKM;
339+ vmovdqa .Lbswap_mask( %rip ) , RKM;
336340 outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
337341 outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
338342
0 commit comments