@@ -209,6 +209,12 @@ static void set_sme_default_vl(int val)
209209 set_default_vl (ARM64_VEC_SME , val );
210210}
211211
212+ static void sme_free (struct task_struct * );
213+
214+ #else
215+
216+ static inline void sme_free (struct task_struct * t ) { }
217+
212218#endif
213219
214220DEFINE_PER_CPU (bool , fpsimd_context_busy );
@@ -676,7 +682,7 @@ static void sve_to_fpsimd(struct task_struct *task)
676682 * Return how many bytes of memory are required to store the full SVE
677683 * state for task, given task's currently configured vector length.
678684 */
679- static size_t sve_state_size (struct task_struct const * task )
685+ size_t sve_state_size (struct task_struct const * task )
680686{
681687 unsigned int vl = 0 ;
682688
@@ -818,18 +824,22 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
818824 thread_sm_enabled (& task -> thread ))
819825 sve_to_fpsimd (task );
820826
821- if (system_supports_sme () && type == ARM64_VEC_SME )
827+ if (system_supports_sme () && type == ARM64_VEC_SME ) {
822828 task -> thread .svcr &= ~(SYS_SVCR_EL0_SM_MASK |
823829 SYS_SVCR_EL0_ZA_MASK );
830+ clear_thread_flag (TIF_SME );
831+ }
824832
825833 if (task == current )
826834 put_cpu_fpsimd_context ();
827835
828836 /*
829- * Force reallocation of task SVE state to the correct size
830- * on next use:
837+ * Force reallocation of task SVE and SME state to the correct
838+ * size on next use:
831839 */
832840 sve_free (task );
841+ if (system_supports_sme () && type == ARM64_VEC_SME )
842+ sme_free (task );
833843
834844 task_set_vl (task , type , vl );
835845
@@ -1164,12 +1174,43 @@ void __init sve_setup(void)
11641174void fpsimd_release_task (struct task_struct * dead_task )
11651175{
11661176 __sve_free (dead_task );
1177+ sme_free (dead_task );
11671178}
11681179
11691180#endif /* CONFIG_ARM64_SVE */
11701181
11711182#ifdef CONFIG_ARM64_SME
11721183
1184+ /* This will move to uapi/asm/sigcontext.h when signals are implemented */
1185+ #define ZA_SIG_REGS_SIZE (vq ) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
1186+
1187+ /*
1188+ * Ensure that task->thread.za_state is allocated and sufficiently large.
1189+ *
1190+ * This function should be used only in preparation for replacing
1191+ * task->thread.za_state with new data. The memory is always zeroed
1192+ * here to prevent stale data from showing through: this is done in
1193+ * the interest of testability and predictability, the architecture
1194+ * guarantees that when ZA is enabled it will be zeroed.
1195+ */
1196+ void sme_alloc (struct task_struct * task )
1197+ {
1198+ if (task -> thread .za_state ) {
1199+ memset (task -> thread .za_state , 0 , za_state_size (task ));
1200+ return ;
1201+ }
1202+
1203+ /* This could potentially be up to 64K. */
1204+ task -> thread .za_state =
1205+ kzalloc (za_state_size (task ), GFP_KERNEL );
1206+ }
1207+
1208+ static void sme_free (struct task_struct * task )
1209+ {
1210+ kfree (task -> thread .za_state );
1211+ task -> thread .za_state = NULL ;
1212+ }
1213+
11731214void sme_kernel_enable (const struct arm64_cpu_capabilities * __always_unused p )
11741215{
11751216 /* Set priority for all PEs to architecturally defined minimum */
@@ -1279,6 +1320,29 @@ void __init sme_setup(void)
12791320
12801321#endif /* CONFIG_ARM64_SME */
12811322
1323+ static void sve_init_regs (void )
1324+ {
1325+ /*
1326+ * Convert the FPSIMD state to SVE, zeroing all the state that
1327+ * is not shared with FPSIMD. If (as is likely) the current
1328+ * state is live in the registers then do this there and
1329+ * update our metadata for the current task including
1330+ * disabling the trap, otherwise update our in-memory copy.
1331+ * We are guaranteed to not be in streaming mode, we can only
1332+ * take a SVE trap when not in streaming mode and we can't be
1333+ * in streaming mode when taking a SME trap.
1334+ */
1335+ if (!test_thread_flag (TIF_FOREIGN_FPSTATE )) {
1336+ unsigned long vq_minus_one =
1337+ sve_vq_from_vl (task_get_sve_vl (current )) - 1 ;
1338+ sve_set_vq (vq_minus_one );
1339+ sve_flush_live (true, vq_minus_one );
1340+ fpsimd_bind_task_to_cpu ();
1341+ } else {
1342+ fpsimd_to_sve (current );
1343+ }
1344+ }
1345+
12821346/*
12831347 * Trapped SVE access
12841348 *
@@ -1310,22 +1374,77 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
13101374 WARN_ON (1 ); /* SVE access shouldn't have trapped */
13111375
13121376 /*
1313- * Convert the FPSIMD state to SVE, zeroing all the state that
1314- * is not shared with FPSIMD. If (as is likely) the current
1315- * state is live in the registers then do this there and
1316- * update our metadata for the current task including
1317- * disabling the trap, otherwise update our in-memory copy.
1377+ * Even if the task can have used streaming mode we can only
1378+ * generate SVE access traps in normal SVE mode and
1379+ * transitioning out of streaming mode may discard any
1380+ * streaming mode state. Always clear the high bits to avoid
1381+ * any potential errors tracking what is properly initialised.
1382+ */
1383+ sve_init_regs ();
1384+
1385+ put_cpu_fpsimd_context ();
1386+ }
1387+
1388+ /*
1389+ * Trapped SME access
1390+ *
1391+ * Storage is allocated for the full SVE and SME state, the current
1392+ * FPSIMD register contents are migrated to SVE if SVE is not already
1393+ * active, and the access trap is disabled.
1394+ *
1395+ * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
1396+ * would have disabled the SME access trap for userspace during
1397+ * ret_to_user, making an SVE access trap impossible in that case.
1398+ */
1399+ void do_sme_acc (unsigned int esr , struct pt_regs * regs )
1400+ {
1401+ /* Even if we chose not to use SME, the hardware could still trap: */
1402+ if (unlikely (!system_supports_sme ()) || WARN_ON (is_compat_task ())) {
1403+ force_signal_inject (SIGILL , ILL_ILLOPC , regs -> pc , 0 );
1404+ return ;
1405+ }
1406+
1407+ /*
1408+ * If this not a trap due to SME being disabled then something
1409+ * is being used in the wrong mode, report as SIGILL.
13181410 */
1411+ if (ESR_ELx_ISS (esr ) != ESR_ELx_SME_ISS_SME_DISABLED ) {
1412+ force_signal_inject (SIGILL , ILL_ILLOPC , regs -> pc , 0 );
1413+ return ;
1414+ }
1415+
1416+ sve_alloc (current );
1417+ sme_alloc (current );
1418+ if (!current -> thread .sve_state || !current -> thread .za_state ) {
1419+ force_sig (SIGKILL );
1420+ return ;
1421+ }
1422+
1423+ get_cpu_fpsimd_context ();
1424+
1425+ /* With TIF_SME userspace shouldn't generate any traps */
1426+ if (test_and_set_thread_flag (TIF_SME ))
1427+ WARN_ON (1 );
1428+
13191429 if (!test_thread_flag (TIF_FOREIGN_FPSTATE )) {
13201430 unsigned long vq_minus_one =
1321- sve_vq_from_vl (task_get_sve_vl (current )) - 1 ;
1322- sve_set_vq (vq_minus_one );
1323- sve_flush_live (true, vq_minus_one );
1431+ sve_vq_from_vl (task_get_sme_vl (current )) - 1 ;
1432+ sme_set_vq (vq_minus_one );
1433+
13241434 fpsimd_bind_task_to_cpu ();
1325- } else {
1326- fpsimd_to_sve (current );
13271435 }
13281436
1437+ /*
1438+ * If SVE was not already active initialise the SVE registers,
1439+ * any non-shared state between the streaming and regular SVE
1440+ * registers is architecturally guaranteed to be zeroed when
1441+ * we enter streaming mode. We do not need to initialize ZA
1442+ * since ZA must be disabled at this point and enabling ZA is
1443+ * architecturally defined to zero ZA.
1444+ */
1445+ if (system_supports_sve () && !test_thread_flag (TIF_SVE ))
1446+ sve_init_regs ();
1447+
13291448 put_cpu_fpsimd_context ();
13301449}
13311450
@@ -1442,8 +1561,12 @@ void fpsimd_flush_thread(void)
14421561 fpsimd_flush_thread_vl (ARM64_VEC_SVE );
14431562 }
14441563
1445- if (system_supports_sme ())
1564+ if (system_supports_sme ()) {
1565+ clear_thread_flag (TIF_SME );
1566+ sme_free (current );
14461567 fpsimd_flush_thread_vl (ARM64_VEC_SME );
1568+ current -> thread .svcr = 0 ;
1569+ }
14471570
14481571 put_cpu_fpsimd_context ();
14491572}
@@ -1493,14 +1616,22 @@ static void fpsimd_bind_task_to_cpu(void)
14931616 last -> svcr = & current -> thread .svcr ;
14941617 current -> thread .fpsimd_cpu = smp_processor_id ();
14951618
1619+ /*
1620+ * Toggle SVE and SME trapping for userspace if needed, these
1621+ * are serialsied by ret_to_user().
1622+ */
1623+ if (system_supports_sme ()) {
1624+ if (test_thread_flag (TIF_SME ))
1625+ sme_user_enable ();
1626+ else
1627+ sme_user_disable ();
1628+ }
1629+
14961630 if (system_supports_sve ()) {
1497- /* Toggle SVE trapping for userspace if needed */
14981631 if (test_thread_flag (TIF_SVE ))
14991632 sve_user_enable ();
15001633 else
15011634 sve_user_disable ();
1502-
1503- /* Serialised by exception return to user */
15041635 }
15051636}
15061637
0 commit comments