Skip to content

Commit 5e64b86

Browse files
brooniectmarinas
authored andcommitted
arm64/sme: Basic enumeration support
This patch introduces basic cpufeature support for discovering the presence of the Scalable Matrix Extension. Signed-off-by: Mark Brown <broonie@kernel.org> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Link: https://lore.kernel.org/r/20220419112247.711548-9-broonie@kernel.org Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
1 parent b2cf6a2 commit 5e64b86

10 files changed

Lines changed: 164 additions & 0 deletions

File tree

Documentation/arm64/elf_hwcaps.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,39 @@ HWCAP2_MTE3
264264
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
265265
by Documentation/arm64/memory-tagging-extension.rst.
266266

267+
HWCAP2_SME
268+
269+
Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
270+
by Documentation/arm64/sme.rst.
271+
272+
HWCAP2_SME_I16I64
273+
274+
Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
275+
276+
HWCAP2_SME_F64F64
277+
278+
Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
279+
280+
HWCAP2_SME_I8I32
281+
282+
Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
283+
284+
HWCAP2_SME_F16F32
285+
286+
Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
287+
288+
HWCAP2_SME_B16F32
289+
290+
Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
291+
292+
HWCAP2_SME_F32F32
293+
294+
Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
295+
296+
HWCAP2_SME_FA64
297+
298+
Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
299+
267300
4. Unused AT_HWCAP bits
268301
-----------------------
269302

arch/arm64/include/asm/cpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ struct cpuinfo_arm64 {
5858
u64 reg_id_aa64pfr0;
5959
u64 reg_id_aa64pfr1;
6060
u64 reg_id_aa64zfr0;
61+
u64 reg_id_aa64smfr0;
6162

6263
struct cpuinfo_32bit aarch32;
6364

arch/arm64/include/asm/cpufeature.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,18 @@ static __always_inline bool system_supports_sve(void)
759759
cpus_have_const_cap(ARM64_SVE);
760760
}
761761

762+
static __always_inline bool system_supports_sme(void)
763+
{
764+
return IS_ENABLED(CONFIG_ARM64_SME) &&
765+
cpus_have_const_cap(ARM64_SME);
766+
}
767+
768+
static __always_inline bool system_supports_fa64(void)
769+
{
770+
return IS_ENABLED(CONFIG_ARM64_SME) &&
771+
cpus_have_const_cap(ARM64_SME_FA64);
772+
}
773+
762774
static __always_inline bool system_supports_cnp(void)
763775
{
764776
return IS_ENABLED(CONFIG_ARM64_CNP) &&

arch/arm64/include/asm/fpsimd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ extern void sve_set_vq(unsigned long vq_minus_1);
7474

7575
struct arm64_cpu_capabilities;
7676
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
77+
extern void sme_kernel_enable(const struct arm64_cpu_capabilities *__unused);
78+
extern void fa64_kernel_enable(const struct arm64_cpu_capabilities *__unused);
7779

7880
extern u64 read_zcr_features(void);
7981

arch/arm64/include/asm/hwcap.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@
109109
#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP)
110110
#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES)
111111
#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3)
112+
#define KERNEL_HWCAP_SME __khwcap2_feature(SME)
113+
#define KERNEL_HWCAP_SME_I16I64 __khwcap2_feature(SME_I16I64)
114+
#define KERNEL_HWCAP_SME_F64F64 __khwcap2_feature(SME_F64F64)
115+
#define KERNEL_HWCAP_SME_I8I32 __khwcap2_feature(SME_I8I32)
116+
#define KERNEL_HWCAP_SME_F16F32 __khwcap2_feature(SME_F16F32)
117+
#define KERNEL_HWCAP_SME_B16F32 __khwcap2_feature(SME_B16F32)
118+
#define KERNEL_HWCAP_SME_F32F32 __khwcap2_feature(SME_F32F32)
119+
#define KERNEL_HWCAP_SME_FA64 __khwcap2_feature(SME_FA64)
112120

113121
/*
114122
* This yields a mask that user programs can use to figure out what

arch/arm64/include/uapi/asm/hwcap.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,5 +79,13 @@
7979
#define HWCAP2_AFP (1 << 20)
8080
#define HWCAP2_RPRES (1 << 21)
8181
#define HWCAP2_MTE3 (1 << 22)
82+
#define HWCAP2_SME (1 << 23)
83+
#define HWCAP2_SME_I16I64 (1 << 24)
84+
#define HWCAP2_SME_F64F64 (1 << 25)
85+
#define HWCAP2_SME_I8I32 (1 << 26)
86+
#define HWCAP2_SME_F16F32 (1 << 27)
87+
#define HWCAP2_SME_B16F32 (1 << 28)
88+
#define HWCAP2_SME_F32F32 (1 << 29)
89+
#define HWCAP2_SME_FA64 (1 << 30)
8290

8391
#endif /* _UAPI__ASM_HWCAP_H */

arch/arm64/kernel/cpufeature.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
261261
};
262262

263263
static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
264+
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
265+
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0),
264266
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
265267
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
266268
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
@@ -293,6 +295,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
293295
ARM64_FTR_END,
294296
};
295297

298+
static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
299+
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
300+
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_FA64_SHIFT, 1, 0),
301+
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
302+
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I16I64_SHIFT, 4, 0),
303+
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
304+
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F64F64_SHIFT, 1, 0),
305+
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
306+
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_I8I32_SHIFT, 4, 0),
307+
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
308+
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F16F32_SHIFT, 1, 0),
309+
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
310+
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_B16F32_SHIFT, 1, 0),
311+
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
312+
FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_F32F32_SHIFT, 1, 0),
313+
ARM64_FTR_END,
314+
};
315+
296316
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
297317
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
298318
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
@@ -645,6 +665,7 @@ static const struct __ftr_reg_entry {
645665
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
646666
&id_aa64pfr1_override),
647667
ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
668+
ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
648669

649670
/* Op1 = 0, CRn = 0, CRm = 5 */
650671
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -960,6 +981,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
960981
init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
961982
init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
962983
init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
984+
init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0);
963985

964986
if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
965987
init_32bit_cpu_features(&info->aarch32);
@@ -2442,6 +2464,33 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
24422464
.matches = has_cpuid_feature,
24432465
.min_field_value = 1,
24442466
},
2467+
#ifdef CONFIG_ARM64_SME
2468+
{
2469+
.desc = "Scalable Matrix Extension",
2470+
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
2471+
.capability = ARM64_SME,
2472+
.sys_reg = SYS_ID_AA64PFR1_EL1,
2473+
.sign = FTR_UNSIGNED,
2474+
.field_pos = ID_AA64PFR1_SME_SHIFT,
2475+
.field_width = 4,
2476+
.min_field_value = ID_AA64PFR1_SME,
2477+
.matches = has_cpuid_feature,
2478+
.cpu_enable = sme_kernel_enable,
2479+
},
2480+
/* FA64 should be sorted after the base SME capability */
2481+
{
2482+
.desc = "FA64",
2483+
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
2484+
.capability = ARM64_SME_FA64,
2485+
.sys_reg = SYS_ID_AA64SMFR0_EL1,
2486+
.sign = FTR_UNSIGNED,
2487+
.field_pos = ID_AA64SMFR0_FA64_SHIFT,
2488+
.field_width = 1,
2489+
.min_field_value = ID_AA64SMFR0_FA64,
2490+
.matches = has_cpuid_feature,
2491+
.cpu_enable = fa64_kernel_enable,
2492+
},
2493+
#endif /* CONFIG_ARM64_SME */
24452494
{},
24462495
};
24472496

@@ -2575,6 +2624,16 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
25752624
HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
25762625
HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
25772626
HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
2627+
#ifdef CONFIG_ARM64_SME
2628+
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
2629+
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_FA64, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
2630+
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I16I64, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
2631+
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F64F64, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
2632+
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_I8I32_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_I8I32, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32),
2633+
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F16F32, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32),
2634+
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_B16F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_B16F32, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32),
2635+
HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_F32F32_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_F32F32, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32),
2636+
#endif /* CONFIG_ARM64_SME */
25782637
{},
25792638
};
25802639

arch/arm64/kernel/cpuinfo.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ static const char *const hwcap_str[] = {
9898
[KERNEL_HWCAP_AFP] = "afp",
9999
[KERNEL_HWCAP_RPRES] = "rpres",
100100
[KERNEL_HWCAP_MTE3] = "mte3",
101+
[KERNEL_HWCAP_SME] = "sme",
102+
[KERNEL_HWCAP_SME_I16I64] = "smei16i64",
103+
[KERNEL_HWCAP_SME_F64F64] = "smef64f64",
104+
[KERNEL_HWCAP_SME_I8I32] = "smei8i32",
105+
[KERNEL_HWCAP_SME_F16F32] = "smef16f32",
106+
[KERNEL_HWCAP_SME_B16F32] = "smeb16f32",
107+
[KERNEL_HWCAP_SME_F32F32] = "smef32f32",
108+
[KERNEL_HWCAP_SME_FA64] = "smefa64",
101109
};
102110

103111
#ifdef CONFIG_COMPAT
@@ -401,6 +409,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
401409
info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
402410
info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
403411
info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
412+
info->reg_id_aa64smfr0 = read_cpuid(ID_AA64SMFR0_EL1);
404413

405414
if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
406415
info->reg_gmid = read_cpuid(GMID_EL1);

arch/arm64/kernel/fpsimd.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,32 @@ void fpsimd_release_task(struct task_struct *dead_task)
993993

994994
#endif /* CONFIG_ARM64_SVE */
995995

996+
#ifdef CONFIG_ARM64_SME
997+
998+
void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
999+
{
1000+
/* Set priority for all PEs to architecturally defined minimum */
1001+
write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK,
1002+
SYS_SMPRI_EL1);
1003+
1004+
/* Allow SME in kernel */
1005+
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1);
1006+
isb();
1007+
}
1008+
1009+
/*
1010+
* This must be called after sme_kernel_enable(), we rely on the
1011+
* feature table being sorted to ensure this.
1012+
*/
1013+
void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
1014+
{
1015+
/* Allow use of FA64 */
1016+
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK,
1017+
SYS_SMCR_EL1);
1018+
}
1019+
1020+
#endif /* CONFIG_ARM64_SVE */
1021+
9961022
/*
9971023
* Trapped SVE access
9981024
*
@@ -1538,6 +1564,10 @@ static int __init fpsimd_init(void)
15381564
if (!cpu_have_named_feature(ASIMD))
15391565
pr_notice("Advanced SIMD is not implemented\n");
15401566

1567+
1568+
if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE))
1569+
pr_notice("SME is implemented but not SVE\n");
1570+
15411571
return sve_sysctl_init();
15421572
}
15431573
core_initcall(fpsimd_init);

arch/arm64/tools/cpucaps

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ KVM_PROTECTED_MODE
4343
MISMATCHED_CACHE_TYPE
4444
MTE
4545
MTE_ASYMM
46+
SME
47+
SME_FA64
4648
SPECTRE_V2
4749
SPECTRE_V3A
4850
SPECTRE_V4

0 commit comments

Comments
 (0)