Skip to content

Commit 3fed7e0

Browse files
Lucas Weiwilldeacon
authored andcommitted
arm64: errata: Workaround for SI L1 downstream coherency issue
When software issues a Cache Maintenance Operation (CMO) targeting a dirty cache line, the CPU and DSU cluster may optimize the operation by combining the CopyBack Write and CMO into a single combined CopyBack Write plus CMO transaction presented to the interconnect (MCN). For these combined transactions, the MCN splits the operation into two separate transactions, one Write and one CMO, and then propagates the write and optionally the CMO to the downstream memory system or external Point of Serialization (PoS). However, the MCN may return an early CompCMO response to the DSU cluster before the corresponding Write and CMO transactions have completed at the external PoS or downstream memory. As a result, stale data may be observed by external observers that are directly connected to the external PoS or downstream memory. This erratum affects any system topology in which the following conditions apply: - The Point of Serialization (PoS) is located downstream of the interconnect. - A downstream observer accesses memory directly, bypassing the interconnect. Conditions: This erratum occurs only when all of the following conditions are met: 1. Software executes a data cache maintenance operation, specifically, a clean or clean&invalidate by virtual address (DC CVAC or DC CIVAC), that hits on unique dirty data in the CPU or DSU cache. This results in a combined CopyBack and CMO being issued to the interconnect. 2. The interconnect splits the combined transaction into separate Write and CMO transactions and returns an early completion response to the CPU or DSU before the write has completed at the downstream memory or PoS. 3. A downstream observer accesses the affected memory address after the early completion response is issued but before the actual memory write has completed. This allows the observer to read stale data that has not yet been updated at the PoS or downstream memory. The implementation of workaround put a second loop of CMOs at the same virtual address whose operation meet erratum conditions to wait until cache data be cleaned to PoC. This way of implementation mitigates performance penalty compared to purely duplicate original CMO. Signed-off-by: Lucas Wei <lucaswei@google.com> Signed-off-by: Will Deacon <will@kernel.org>
1 parent 8f0b4cc commit 3fed7e0

5 files changed

Lines changed: 62 additions & 0 deletions

File tree

Documentation/arch/arm64/silicon-errata.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ stable kernels.
212212
+----------------+-----------------+-----------------+-----------------------------+
213213
| ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 |
214214
+----------------+-----------------+-----------------+-----------------------------+
215+
| ARM | SI L1 | #4311569 | ARM64_ERRATUM_4311569 |
215216
+----------------+-----------------+-----------------+-----------------------------+
216217
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
217218
+----------------+-----------------+-----------------+-----------------------------+

arch/arm64/Kconfig

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,6 +1155,25 @@ config ARM64_ERRATUM_3194386
11551155

11561156
If unsure, say Y.
11571157

1158+
config ARM64_ERRATUM_4311569
1159+
bool "SI L1: 4311569: workaround for premature CMO completion erratum"
1160+
default y
1161+
help
1162+
This option adds the workaround for ARM SI L1 erratum 4311569.
1163+
1164+
The erratum of SI L1 can cause an early response to a combined write
1165+
and cache maintenance operation (WR+CMO) before the operation is fully
1166+
completed to the Point of Serialization (POS).
1167+
This can result in a non-I/O coherent agent observing stale data,
1168+
potentially leading to system instability or incorrect behavior.
1169+
1170+
Enabling this option implements a software workaround by inserting a
1171+
second loop of Cache Maintenance Operation (CMO) immediately following the
1172+
end of function to do CMOs. This ensures that the data is correctly serialized
1173+
before the buffer is handed off to a non-coherent agent.
1174+
1175+
If unsure, say Y.
1176+
11581177
config CAVIUM_ERRATUM_22375
11591178
bool "Cavium erratum 22375, 24313"
11601179
default y

arch/arm64/include/asm/assembler.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,9 @@ alternative_endif
381381
.macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup
382382
sub \tmp, \linesz, #1
383383
bic \start, \start, \tmp
384+
alternative_if ARM64_WORKAROUND_4311569
385+
mov \tmp, \start
386+
alternative_else_nop_endif
384387
.Ldcache_op\@:
385388
.ifc \op, cvau
386389
__dcache_op_workaround_clean_cache \op, \start
@@ -402,6 +405,13 @@ alternative_endif
402405
add \start, \start, \linesz
403406
cmp \start, \end
404407
b.lo .Ldcache_op\@
408+
alternative_if ARM64_WORKAROUND_4311569
409+
.ifnc \op, cvau
410+
mov \start, \tmp
411+
mov \tmp, xzr
412+
cbnz \start, .Ldcache_op\@
413+
.endif
414+
alternative_else_nop_endif
405415
dsb \domain
406416

407417
_cond_uaccess_extable .Ldcache_op\@, \fixup

arch/arm64/kernel/cpu_errata.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,30 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
141141
return (ctr_real != sys) && (ctr_raw != sys);
142142
}
143143

144+
#ifdef CONFIG_ARM64_ERRATUM_4311569
145+
static DEFINE_STATIC_KEY_FALSE(arm_si_l1_workaround_4311569);
146+
static int __init early_arm_si_l1_workaround_4311569_cfg(char *arg)
147+
{
148+
static_branch_enable(&arm_si_l1_workaround_4311569);
149+
pr_info("Enabling cache maintenance workaround for ARM SI-L1 erratum 4311569\n");
150+
151+
return 0;
152+
}
153+
early_param("arm_si_l1_workaround_4311569", early_arm_si_l1_workaround_4311569_cfg);
154+
155+
/*
156+
* We have some earlier use cases to call cache maintenance operation functions, for example,
157+
* dcache_inval_poc() and dcache_clean_poc() in head.S, before making decision to turn on this
158+
* workaround. Since the scope of this workaround is limited to non-coherent DMA agents, its
159+
* safe to have the workaround off by default.
160+
*/
161+
static bool
162+
need_arm_si_l1_workaround_4311569(const struct arm64_cpu_capabilities *entry, int scope)
163+
{
164+
return static_branch_unlikely(&arm_si_l1_workaround_4311569);
165+
}
166+
#endif
167+
144168
static void
145169
cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
146170
{
@@ -870,6 +894,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
870894
ERRATA_MIDR_RANGE_LIST(erratum_spec_ssbs_list),
871895
},
872896
#endif
897+
#ifdef CONFIG_ARM64_ERRATUM_4311569
898+
{
899+
.capability = ARM64_WORKAROUND_4311569,
900+
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
901+
.matches = need_arm_si_l1_workaround_4311569,
902+
},
903+
#endif
873904
#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
874905
{
875906
.desc = "ARM errata 2966298, 3117295",

arch/arm64/tools/cpucaps

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ WORKAROUND_2077057
103103
WORKAROUND_2457168
104104
WORKAROUND_2645198
105105
WORKAROUND_2658417
106+
WORKAROUND_4311569
106107
WORKAROUND_AMPERE_AC03_CPU_38
107108
WORKAROUND_AMPERE_AC04_CPU_23
108109
WORKAROUND_TRBE_OVERWRITE_FILL_MODE

0 commit comments

Comments
 (0)