Skip to content

Commit 62a31d6

Browse files
evangreenpalmer-dabbelt
authored andcommitted
RISC-V: hwprobe: Support probing of misaligned access performance
This allows userspace to select various routines to use based on the performance of misaligned access on the target hardware. Rather than adding DT bindings, this change taps into the alternatives mechanism used to probe CPU errata. Add a new function pointer alongside the vendor-specific errata_patch_func() that probes for desirable errata (otherwise known as "features"). Unlike the errata_patch_func(), this function is called on each CPU as it comes up, so it can save feature information per-CPU. The T-head C906 has fast unaligned access, both as defined by GCC [1], and in performing a basic benchmark, which determined that byte copies are >50% slower than a misaligned word copy of the same data size (source for this test at [2]): bytecopy size f000 count 50000 offset 0 took 31664899 us wordcopy size f000 count 50000 offset 0 took 5180919 us wordcopy size f000 count 50000 offset 1 took 13416949 us [1] https://github.com/gcc-mirror/gcc/blob/master/gcc/config/riscv/riscv.cc#L353 [2] https://pastebin.com/EPXvDHSW Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com> Signed-off-by: Evan Green <evan@rivosinc.com> Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Tested-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Reviewed-by: Paul Walmsley <paul.walmsley@sifive.com> Link: https://lore.kernel.org/r/20230407231103.2622178-5-evan@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent 00e76e2 commit 62a31d6

10 files changed

Lines changed: 97 additions & 1 deletion

File tree

Documentation/riscv/hwprobe.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,24 @@ The following keys are defined:
6363

6464
* :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
6565
by version 2.2 of the RISC-V ISA manual.
66+
67+
* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
68+
information about the selected set of processors.
69+
70+
* :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
71+
accesses is unknown.
72+
73+
* :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
74+
emulated via software, either in or below the kernel. These accesses are
75+
always extremely slow.
76+
77+
* :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
78+
in hardware, but are slower than the cooresponding aligned accesses
79+
sequences.
80+
81+
* :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
82+
in hardware and are faster than the cooresponding aligned accesses
83+
sequences.
84+
85+
* :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
86+
not supported at all and will generate a misaligned address fault.

arch/riscv/errata/thead/errata.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
#include <linux/uaccess.h>
1212
#include <asm/alternative.h>
1313
#include <asm/cacheflush.h>
14+
#include <asm/cpufeature.h>
1415
#include <asm/errata_list.h>
16+
#include <asm/hwprobe.h>
1517
#include <asm/patch.h>
1618
#include <asm/vendorid_list.h>
1719

@@ -115,3 +117,11 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
115117
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
116118
local_flush_icache_all();
117119
}
120+
121+
void __init_or_module thead_feature_probe_func(unsigned int cpu,
122+
unsigned long archid,
123+
unsigned long impid)
124+
{
125+
if ((archid == 0) && (impid == 0))
126+
per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
127+
}

arch/riscv/include/asm/alternative.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#define ALT_OLD_PTR(a) __ALT_PTR(a, old_offset)
2929
#define ALT_ALT_PTR(a) __ALT_PTR(a, alt_offset)
3030

31+
void __init probe_vendor_features(unsigned int cpu);
3132
void __init apply_boot_alternatives(void);
3233
void __init apply_early_boot_alternatives(void);
3334
void apply_module_alternatives(void *start, size_t length);
@@ -55,11 +56,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
5556
unsigned long archid, unsigned long impid,
5657
unsigned int stage);
5758

59+
void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
60+
unsigned long impid);
61+
5862
void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
5963
unsigned int stage);
6064

6165
#else /* CONFIG_RISCV_ALTERNATIVE */
6266

67+
static inline void probe_vendor_features(unsigned int cpu) { }
6368
static inline void apply_boot_alternatives(void) { }
6469
static inline void apply_early_boot_alternatives(void) { }
6570
static inline void apply_module_alternatives(void *start, size_t length) { }

arch/riscv/include/asm/cpufeature.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,6 @@ struct riscv_cpuinfo {
1818

1919
DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
2020

21+
DECLARE_PER_CPU(long, misaligned_access_speed);
22+
2123
#endif

arch/riscv/include/asm/hwprobe.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@
88

99
#include <uapi/asm/hwprobe.h>
1010

11-
#define RISCV_HWPROBE_MAX_KEY 4
11+
#define RISCV_HWPROBE_MAX_KEY 5
1212

1313
#endif

arch/riscv/include/uapi/asm/hwprobe.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@ struct riscv_hwprobe {
2525
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
2626
#define RISCV_HWPROBE_IMA_FD (1 << 0)
2727
#define RISCV_HWPROBE_IMA_C (1 << 1)
28+
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
29+
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
30+
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
31+
#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
32+
#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
33+
#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
34+
#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
2835
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
2936

3037
#endif

arch/riscv/kernel/alternative.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ struct cpu_manufacturer_info_t {
2727
void (*patch_func)(struct alt_entry *begin, struct alt_entry *end,
2828
unsigned long archid, unsigned long impid,
2929
unsigned int stage);
30+
void (*feature_probe_func)(unsigned int cpu, unsigned long archid,
31+
unsigned long impid);
3032
};
3133

3234
static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
@@ -41,6 +43,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
4143
cpu_mfr_info->imp_id = sbi_get_mimpid();
4244
#endif
4345

46+
cpu_mfr_info->feature_probe_func = NULL;
4447
switch (cpu_mfr_info->vendor_id) {
4548
#ifdef CONFIG_ERRATA_SIFIVE
4649
case SIFIVE_VENDOR_ID:
@@ -50,6 +53,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
5053
#ifdef CONFIG_ERRATA_THEAD
5154
case THEAD_VENDOR_ID:
5255
cpu_mfr_info->patch_func = thead_errata_patch_func;
56+
cpu_mfr_info->feature_probe_func = thead_feature_probe_func;
5357
break;
5458
#endif
5559
default:
@@ -139,6 +143,20 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len,
139143
}
140144
}
141145

146+
/* Called on each CPU as it starts */
147+
void __init_or_module probe_vendor_features(unsigned int cpu)
148+
{
149+
struct cpu_manufacturer_info_t cpu_mfr_info;
150+
151+
riscv_fill_cpu_mfr_info(&cpu_mfr_info);
152+
if (!cpu_mfr_info.feature_probe_func)
153+
return;
154+
155+
cpu_mfr_info.feature_probe_func(cpu,
156+
cpu_mfr_info.arch_id,
157+
cpu_mfr_info.imp_id);
158+
}
159+
142160
/*
143161
* This is called very early in the boot process (directly after we run
144162
* a feature detect on the boot CPU). No need to worry about other CPUs
@@ -193,6 +211,7 @@ void __init apply_boot_alternatives(void)
193211
/* If called on non-boot cpu things could go wrong */
194212
WARN_ON(smp_processor_id() != 0);
195213

214+
probe_vendor_features(0);
196215
_apply_alternatives((struct alt_entry *)__alt_start,
197216
(struct alt_entry *)__alt_end,
198217
RISCV_ALTERNATIVES_BOOT);

arch/riscv/kernel/cpufeature.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ unsigned long elf_hwcap __read_mostly;
3030
/* Host ISA bitmap */
3131
static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
3232

33+
/* Performance information */
34+
DEFINE_PER_CPU(long, misaligned_access_speed);
35+
3336
/**
3437
* riscv_isa_extension_base() - Get base extension word
3538
*

arch/riscv/kernel/smpboot.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ asmlinkage __visible void smp_callin(void)
168168
notify_cpu_starting(curr_cpuid);
169169
numa_add_cpu(curr_cpuid);
170170
set_cpu_online(curr_cpuid, 1);
171+
probe_vendor_features(curr_cpuid);
171172

172173
/*
173174
* Remote TLB flushes are ignored while the CPU is offline, so emit

arch/riscv/kernel/sys_riscv.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <linux/syscalls.h>
99
#include <asm/cacheflush.h>
10+
#include <asm/cpufeature.h>
1011
#include <asm/hwprobe.h>
1112
#include <asm/sbi.h>
1213
#include <asm/switch_to.h>
@@ -117,6 +118,29 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair,
117118
pair->value = id;
118119
}
119120

121+
static u64 hwprobe_misaligned(const struct cpumask *cpus)
122+
{
123+
int cpu;
124+
u64 perf = -1ULL;
125+
126+
for_each_cpu(cpu, cpus) {
127+
int this_perf = per_cpu(misaligned_access_speed, cpu);
128+
129+
if (perf == -1ULL)
130+
perf = this_perf;
131+
132+
if (perf != this_perf) {
133+
perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
134+
break;
135+
}
136+
}
137+
138+
if (perf == -1ULL)
139+
return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
140+
141+
return perf;
142+
}
143+
120144
static void hwprobe_one_pair(struct riscv_hwprobe *pair,
121145
const struct cpumask *cpus)
122146
{
@@ -146,6 +170,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
146170

147171
break;
148172

173+
case RISCV_HWPROBE_KEY_CPUPERF_0:
174+
pair->value = hwprobe_misaligned(cpus);
175+
break;
176+
149177
/*
150178
* For forward compatibility, unknown keys don't fail the whole
151179
* call, but get their element key set to -1 and value set to 0

0 commit comments

Comments
 (0)