Skip to content

Commit 7c586a5

Browse files
clementlegerpalmer-dabbelt
authored andcommitted
riscv: add floating point insn support to misaligned access emulation
This support is partially based of openSBI misaligned emulation floating point instruction support. It provides support for the existing floating point instructions (both for 32/64 bits as well as compressed ones). Since floating point registers are not part of the pt_regs struct, we need to modify them directly using some assembly. We also dirty the pt_regs status in case we modify them to be sure context switch will save FP state. With this support, Linux is on par with openSBI support. Signed-off-by: Clément Léger <cleger@rivosinc.com> Link: https://lore.kernel.org/r/20231004151405.521596-5-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent 89c12fe commit 7c586a5

2 files changed

Lines changed: 269 additions & 4 deletions

File tree

arch/riscv/kernel/fpu.S

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,3 +104,124 @@ ENTRY(__fstate_restore)
104104
csrc CSR_STATUS, t1
105105
ret
106106
ENDPROC(__fstate_restore)
107+
108+
#define get_f32(which) fmv.x.s a0, which; j 2f
109+
#define put_f32(which) fmv.s.x which, a1; j 2f
110+
#if __riscv_xlen == 64
111+
# define get_f64(which) fmv.x.d a0, which; j 2f
112+
# define put_f64(which) fmv.d.x which, a1; j 2f
113+
#else
114+
# define get_f64(which) fsd which, 0(a1); j 2f
115+
# define put_f64(which) fld which, 0(a1); j 2f
116+
#endif
117+
118+
.macro fp_access_prologue
119+
/*
120+
* Compute jump offset to store the correct FP register since we don't
121+
* have indirect FP register access
122+
*/
123+
sll t0, a0, 3
124+
la t2, 1f
125+
add t0, t0, t2
126+
li t1, SR_FS
127+
csrs CSR_STATUS, t1
128+
jr t0
129+
1:
130+
.endm
131+
132+
.macro fp_access_epilogue
133+
2:
134+
csrc CSR_STATUS, t1
135+
ret
136+
.endm
137+
138+
#define fp_access_body(__access_func) \
139+
__access_func(f0); \
140+
__access_func(f1); \
141+
__access_func(f2); \
142+
__access_func(f3); \
143+
__access_func(f4); \
144+
__access_func(f5); \
145+
__access_func(f6); \
146+
__access_func(f7); \
147+
__access_func(f8); \
148+
__access_func(f9); \
149+
__access_func(f10); \
150+
__access_func(f11); \
151+
__access_func(f12); \
152+
__access_func(f13); \
153+
__access_func(f14); \
154+
__access_func(f15); \
155+
__access_func(f16); \
156+
__access_func(f17); \
157+
__access_func(f18); \
158+
__access_func(f19); \
159+
__access_func(f20); \
160+
__access_func(f21); \
161+
__access_func(f22); \
162+
__access_func(f23); \
163+
__access_func(f24); \
164+
__access_func(f25); \
165+
__access_func(f26); \
166+
__access_func(f27); \
167+
__access_func(f28); \
168+
__access_func(f29); \
169+
__access_func(f30); \
170+
__access_func(f31)
171+
172+
173+
#ifdef CONFIG_RISCV_MISALIGNED
174+
175+
/*
176+
* Disable compressed instructions set to keep a constant offset between FP
177+
* load/store/move instructions
178+
*/
179+
.option norvc
180+
/*
181+
* put_f32_reg - Set a FP register from a register containing the value
182+
* a0 = FP register index to be set
183+
* a1 = value to be loaded in the FP register
184+
*/
185+
SYM_FUNC_START(put_f32_reg)
186+
fp_access_prologue
187+
fp_access_body(put_f32)
188+
fp_access_epilogue
189+
SYM_FUNC_END(put_f32_reg)
190+
191+
/*
192+
* get_f32_reg - Get a FP register value and return it
193+
* a0 = FP register index to be retrieved
194+
*/
195+
SYM_FUNC_START(get_f32_reg)
196+
fp_access_prologue
197+
fp_access_body(get_f32)
198+
fp_access_epilogue
199+
SYM_FUNC_END(get_f32_reg)
200+
201+
/*
202+
* put_f64_reg - Set a 64 bits FP register from a value or a pointer.
203+
* a0 = FP register index to be set
204+
* a1 = value/pointer to be loaded in the FP register (when xlen == 32 bits, we
205+
* load the value to a pointer).
206+
*/
207+
SYM_FUNC_START(put_f64_reg)
208+
fp_access_prologue
209+
fp_access_body(put_f64)
210+
fp_access_epilogue
211+
SYM_FUNC_END(put_f64_reg)
212+
213+
/*
214+
* put_f64_reg - Get a 64 bits FP register value and returned it or store it to
215+
* a pointer.
216+
* a0 = FP register index to be retrieved
217+
* a1 = If xlen == 32, pointer which should be loaded with the FP register value
218+
* or unused if xlen == 64. In which case the FP register value is returned
219+
* through a0
220+
*/
221+
SYM_FUNC_START(get_f64_reg)
222+
fp_access_prologue
223+
fp_access_body(get_f64)
224+
fp_access_epilogue
225+
SYM_FUNC_END(get_f64_reg)
226+
227+
#endif /* CONFIG_RISCV_MISALIGNED */

arch/riscv/kernel/traps_misaligned.c

Lines changed: 148 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,115 @@
153153
#define PRECISION_S 0
154154
#define PRECISION_D 1
155155

156+
#ifdef CONFIG_FPU
157+
158+
#define FP_GET_RD(insn) (insn >> 7 & 0x1F)
159+
160+
extern void put_f32_reg(unsigned long fp_reg, unsigned long value);
161+
162+
static int set_f32_rd(unsigned long insn, struct pt_regs *regs,
163+
unsigned long val)
164+
{
165+
unsigned long fp_reg = FP_GET_RD(insn);
166+
167+
put_f32_reg(fp_reg, val);
168+
regs->status |= SR_FS_DIRTY;
169+
170+
return 0;
171+
}
172+
173+
extern void put_f64_reg(unsigned long fp_reg, unsigned long value);
174+
175+
static int set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val)
176+
{
177+
unsigned long fp_reg = FP_GET_RD(insn);
178+
unsigned long value;
179+
180+
#if __riscv_xlen == 32
181+
value = (unsigned long) &val;
182+
#else
183+
value = val;
184+
#endif
185+
put_f64_reg(fp_reg, value);
186+
regs->status |= SR_FS_DIRTY;
187+
188+
return 0;
189+
}
190+
191+
#if __riscv_xlen == 32
192+
extern void get_f64_reg(unsigned long fp_reg, u64 *value);
193+
194+
static u64 get_f64_rs(unsigned long insn, u8 fp_reg_offset,
195+
struct pt_regs *regs)
196+
{
197+
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
198+
u64 val;
199+
200+
get_f64_reg(fp_reg, &val);
201+
regs->status |= SR_FS_DIRTY;
202+
203+
return val;
204+
}
205+
#else
206+
207+
extern unsigned long get_f64_reg(unsigned long fp_reg);
208+
209+
static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
210+
struct pt_regs *regs)
211+
{
212+
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
213+
unsigned long val;
214+
215+
val = get_f64_reg(fp_reg);
216+
regs->status |= SR_FS_DIRTY;
217+
218+
return val;
219+
}
220+
221+
#endif
222+
223+
extern unsigned long get_f32_reg(unsigned long fp_reg);
224+
225+
static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
226+
struct pt_regs *regs)
227+
{
228+
unsigned long fp_reg = (insn >> fp_reg_offset) & 0x1F;
229+
unsigned long val;
230+
231+
val = get_f32_reg(fp_reg);
232+
regs->status |= SR_FS_DIRTY;
233+
234+
return val;
235+
}
236+
237+
#else /* CONFIG_FPU */
238+
static void set_f32_rd(unsigned long insn, struct pt_regs *regs,
239+
unsigned long val) {}
240+
241+
static void set_f64_rd(unsigned long insn, struct pt_regs *regs, u64 val) {}
242+
243+
static unsigned long get_f64_rs(unsigned long insn, u8 fp_reg_offset,
244+
struct pt_regs *regs)
245+
{
246+
return 0;
247+
}
248+
249+
static unsigned long get_f32_rs(unsigned long insn, u8 fp_reg_offset,
250+
struct pt_regs *regs)
251+
{
252+
return 0;
253+
}
254+
255+
#endif
256+
257+
#define GET_F64_RS2(insn, regs) (get_f64_rs(insn, 20, regs))
258+
#define GET_F64_RS2C(insn, regs) (get_f64_rs(insn, 2, regs))
259+
#define GET_F64_RS2S(insn, regs) (get_f64_rs(RVC_RS2S(insn), 0, regs))
260+
261+
#define GET_F32_RS2(insn, regs) (get_f32_rs(insn, 20, regs))
262+
#define GET_F32_RS2C(insn, regs) (get_f32_rs(insn, 2, regs))
263+
#define GET_F32_RS2S(insn, regs) (get_f32_rs(RVC_RS2S(insn), 0, regs))
264+
156265
#ifdef CONFIG_RISCV_M_MODE
157266
static inline int load_u8(struct pt_regs *regs, const u8 *addr, u8 *r_val)
158267
{
@@ -362,15 +471,21 @@ int handle_misaligned_load(struct pt_regs *regs)
362471
return -1;
363472
}
364473

474+
if (!IS_ENABLED(CONFIG_FPU) && fp)
475+
return -EOPNOTSUPP;
476+
365477
val.data_u64 = 0;
366478
for (i = 0; i < len; i++) {
367479
if (load_u8(regs, (void *)(addr + i), &val.data_bytes[i]))
368480
return -1;
369481
}
370482

371-
if (fp)
372-
return -1;
373-
SET_RD(insn, regs, val.data_ulong << shift >> shift);
483+
if (!fp)
484+
SET_RD(insn, regs, val.data_ulong << shift >> shift);
485+
else if (len == 8)
486+
set_f64_rd(insn, regs, val.data_u64);
487+
else
488+
set_f32_rd(insn, regs, val.data_ulong);
374489

375490
regs->epc = epc + INSN_LEN(insn);
376491

@@ -383,7 +498,7 @@ int handle_misaligned_store(struct pt_regs *regs)
383498
unsigned long epc = regs->epc;
384499
unsigned long insn;
385500
unsigned long addr = regs->badaddr;
386-
int i, len = 0;
501+
int i, len = 0, fp = 0;
387502

388503
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
389504

@@ -400,6 +515,14 @@ int handle_misaligned_store(struct pt_regs *regs)
400515
} else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
401516
len = 8;
402517
#endif
518+
} else if ((insn & INSN_MASK_FSD) == INSN_MATCH_FSD) {
519+
fp = 1;
520+
len = 8;
521+
val.data_u64 = GET_F64_RS2(insn, regs);
522+
} else if ((insn & INSN_MASK_FSW) == INSN_MATCH_FSW) {
523+
fp = 1;
524+
len = 4;
525+
val.data_ulong = GET_F32_RS2(insn, regs);
403526
} else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
404527
len = 2;
405528
#if defined(CONFIG_64BIT)
@@ -418,11 +541,32 @@ int handle_misaligned_store(struct pt_regs *regs)
418541
((insn >> SH_RD) & 0x1f)) {
419542
len = 4;
420543
val.data_ulong = GET_RS2C(insn, regs);
544+
} else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) {
545+
fp = 1;
546+
len = 8;
547+
val.data_u64 = GET_F64_RS2S(insn, regs);
548+
} else if ((insn & INSN_MASK_C_FSDSP) == INSN_MATCH_C_FSDSP) {
549+
fp = 1;
550+
len = 8;
551+
val.data_u64 = GET_F64_RS2C(insn, regs);
552+
#if !defined(CONFIG_64BIT)
553+
} else if ((insn & INSN_MASK_C_FSW) == INSN_MATCH_C_FSW) {
554+
fp = 1;
555+
len = 4;
556+
val.data_ulong = GET_F32_RS2S(insn, regs);
557+
} else if ((insn & INSN_MASK_C_FSWSP) == INSN_MATCH_C_FSWSP) {
558+
fp = 1;
559+
len = 4;
560+
val.data_ulong = GET_F32_RS2C(insn, regs);
561+
#endif
421562
} else {
422563
regs->epc = epc;
423564
return -1;
424565
}
425566

567+
if (!IS_ENABLED(CONFIG_FPU) && fp)
568+
return -EOPNOTSUPP;
569+
426570
for (i = 0; i < len; i++) {
427571
if (store_u8(regs, (void *)(addr + i), val.data_bytes[i]))
428572
return -1;

0 commit comments

Comments
 (0)