Skip to content

Commit 5a5e9c0

Browse files
committed
accel: Add Arm Ethos-U NPU driver
Add a driver for Arm Ethos-U65/U85 NPUs. The Ethos-U NPU has a relatively simple interface with single command stream to describe buffers, operation settings, and network operations. It supports up to 8 memory regions (though no h/w bounds on a region). The Ethos NPUs are designed to use an SRAM for scratch memory. Region 2 is reserved for SRAM (like the downstream driver stack and compiler). Userspace doesn't need access to the SRAM. The h/w has no MMU nor external IOMMU and is a DMA engine which can read and write anywhere in memory without h/w bounds checks. The user submitted command streams must be validated against the bounds of the GEM BOs. This is similar to the VC4 design which validates shaders. The job submit is based on the rocket driver for the Rockchip NPU utilizing the GPU scheduler. It is simpler as there's only 1 core rather than 3. Tested on i.MX93 platform (U65) and FVP (U85) with Mesa Teflon support. Acked-by: Thomas Zimmermann <tzimmermann@suse.de> Acked-by: Tomeu Vizoso <tomeu@tomeuvizoso.net> Reviewed-by: Frank Li <Frank.Li@nxp.com> Link: https://patch.msgid.link/20251020-ethos-v6-2-ecebc383c4b7@kernel.org Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
1 parent b3e29b6 commit 5a5e9c0

13 files changed

Lines changed: 2188 additions & 0 deletions

File tree

MAINTAINERS

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2017,6 +2017,15 @@ F: arch/arm64/include/asm/arch_timer.h
20172017
F: drivers/clocksource/arm_arch_timer.c
20182018
F: drivers/clocksource/arm_arch_timer_mmio.c
20192019

2020+
ARM ETHOS-U NPU DRIVER
2021+
M: Rob Herring (Arm) <robh@kernel.org>
2022+
M: Tomeu Vizoso <tomeu@tomeuvizoso.net>
2023+
L: dri-devel@lists.freedesktop.org
2024+
S: Supported
2025+
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
2026+
F: drivers/accel/ethosu/
2027+
F: include/uapi/drm/ethosu_accel.h
2028+
20202029
ARM GENERIC INTERRUPT CONTROLLER DRIVERS
20212030
M: Marc Zyngier <maz@kernel.org>
20222031
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)

drivers/accel/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ menuconfig DRM_ACCEL
2525
and debugfs).
2626

2727
source "drivers/accel/amdxdna/Kconfig"
28+
source "drivers/accel/ethosu/Kconfig"
2829
source "drivers/accel/habanalabs/Kconfig"
2930
source "drivers/accel/ivpu/Kconfig"
3031
source "drivers/accel/qaic/Kconfig"

drivers/accel/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22

33
obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
4+
obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) += ethosu/
45
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
56
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
67
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/

drivers/accel/ethosu/Kconfig

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# SPDX-License-Identifier: GPL-2.0-only
2+
3+
config DRM_ACCEL_ARM_ETHOSU
4+
tristate "Arm Ethos-U65/U85 NPU"
5+
depends on HAS_IOMEM
6+
depends on DRM_ACCEL
7+
select DRM_GEM_DMA_HELPER
8+
select DRM_SCHED
9+
select GENERIC_ALLOCATOR
10+
help
11+
Enables driver for Arm Ethos-U65/U85 NPUs

drivers/accel/ethosu/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# SPDX-License-Identifier: GPL-2.0-only
2+
3+
obj-$(CONFIG_DRM_ACCEL_ARM_ETHOSU) := ethosu.o
4+
ethosu-y += ethosu_drv.o ethosu_gem.o ethosu_job.o
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only or MIT */
2+
/* Copyright 2025 Arm, Ltd. */
3+
4+
#ifndef __ETHOSU_DEVICE_H__
5+
#define __ETHOSU_DEVICE_H__
6+
7+
#include <linux/bitfield.h>
8+
#include <linux/bits.h>
9+
#include <linux/types.h>
10+
11+
#include <drm/drm_device.h>
12+
#include <drm/gpu_scheduler.h>
13+
14+
#include <drm/ethosu_accel.h>
15+
16+
struct clk;
17+
struct gen_pool;
18+
19+
#define NPU_REG_ID 0x0000
20+
#define NPU_REG_STATUS 0x0004
21+
#define NPU_REG_CMD 0x0008
22+
#define NPU_REG_RESET 0x000c
23+
#define NPU_REG_QBASE 0x0010
24+
#define NPU_REG_QBASE_HI 0x0014
25+
#define NPU_REG_QREAD 0x0018
26+
#define NPU_REG_QCONFIG 0x001c
27+
#define NPU_REG_QSIZE 0x0020
28+
#define NPU_REG_PROT 0x0024
29+
#define NPU_REG_CONFIG 0x0028
30+
#define NPU_REG_REGIONCFG 0x003c
31+
#define NPU_REG_AXILIMIT0 0x0040 // U65
32+
#define NPU_REG_AXILIMIT1 0x0044 // U65
33+
#define NPU_REG_AXILIMIT2 0x0048 // U65
34+
#define NPU_REG_AXILIMIT3 0x004c // U65
35+
#define NPU_REG_MEM_ATTR0 0x0040 // U85
36+
#define NPU_REG_MEM_ATTR1 0x0044 // U85
37+
#define NPU_REG_MEM_ATTR2 0x0048 // U85
38+
#define NPU_REG_MEM_ATTR3 0x004c // U85
39+
#define NPU_REG_AXI_SRAM 0x0050 // U85
40+
#define NPU_REG_AXI_EXT 0x0054 // U85
41+
42+
#define NPU_REG_BASEP(x) (0x0080 + (x) * 8)
43+
#define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8)
44+
#define NPU_BASEP_REGION_MAX 8
45+
46+
#define ID_ARCH_MAJOR_MASK GENMASK(31, 28)
47+
#define ID_ARCH_MINOR_MASK GENMASK(27, 20)
48+
#define ID_ARCH_PATCH_MASK GENMASK(19, 16)
49+
#define ID_VER_MAJOR_MASK GENMASK(11, 8)
50+
#define ID_VER_MINOR_MASK GENMASK(7, 4)
51+
52+
#define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0)
53+
#define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4)
54+
55+
#define STATUS_STATE_RUNNING BIT(0)
56+
#define STATUS_IRQ_RAISED BIT(1)
57+
#define STATUS_BUS_STATUS BIT(2)
58+
#define STATUS_RESET_STATUS BIT(3)
59+
#define STATUS_CMD_PARSE_ERR BIT(4)
60+
#define STATUS_CMD_END_REACHED BIT(5)
61+
62+
#define CMD_CLEAR_IRQ BIT(1)
63+
#define CMD_TRANSITION_TO_RUN BIT(0)
64+
65+
#define RESET_PENDING_CSL BIT(1)
66+
#define RESET_PENDING_CPL BIT(0)
67+
68+
#define PROT_ACTIVE_CSL BIT(1)
69+
70+
enum ethosu_cmds {
71+
NPU_OP_CONV = 0x2,
72+
NPU_OP_DEPTHWISE = 0x3,
73+
NPU_OP_POOL = 0x5,
74+
NPU_OP_ELEMENTWISE = 0x6,
75+
NPU_OP_RESIZE = 0x7, // U85 only
76+
NPU_OP_DMA_START = 0x10,
77+
NPU_SET_IFM_PAD_TOP = 0x100,
78+
NPU_SET_IFM_PAD_LEFT = 0x101,
79+
NPU_SET_IFM_PAD_RIGHT = 0x102,
80+
NPU_SET_IFM_PAD_BOTTOM = 0x103,
81+
NPU_SET_IFM_DEPTH_M1 = 0x104,
82+
NPU_SET_IFM_PRECISION = 0x105,
83+
NPU_SET_IFM_BROADCAST = 0x108,
84+
NPU_SET_IFM_WIDTH0_M1 = 0x10a,
85+
NPU_SET_IFM_HEIGHT0_M1 = 0x10b,
86+
NPU_SET_IFM_HEIGHT1_M1 = 0x10c,
87+
NPU_SET_IFM_REGION = 0x10f,
88+
NPU_SET_OFM_WIDTH_M1 = 0x111,
89+
NPU_SET_OFM_HEIGHT_M1 = 0x112,
90+
NPU_SET_OFM_DEPTH_M1 = 0x113,
91+
NPU_SET_OFM_PRECISION = 0x114,
92+
NPU_SET_OFM_WIDTH0_M1 = 0x11a,
93+
NPU_SET_OFM_HEIGHT0_M1 = 0x11b,
94+
NPU_SET_OFM_HEIGHT1_M1 = 0x11c,
95+
NPU_SET_OFM_REGION = 0x11f,
96+
NPU_SET_KERNEL_WIDTH_M1 = 0x120,
97+
NPU_SET_KERNEL_HEIGHT_M1 = 0x121,
98+
NPU_SET_KERNEL_STRIDE = 0x122,
99+
NPU_SET_WEIGHT_REGION = 0x128,
100+
NPU_SET_SCALE_REGION = 0x129,
101+
NPU_SET_DMA0_SRC_REGION = 0x130,
102+
NPU_SET_DMA0_DST_REGION = 0x131,
103+
NPU_SET_DMA0_SIZE0 = 0x132,
104+
NPU_SET_DMA0_SIZE1 = 0x133,
105+
NPU_SET_IFM2_BROADCAST = 0x180,
106+
NPU_SET_IFM2_PRECISION = 0x185,
107+
NPU_SET_IFM2_WIDTH0_M1 = 0x18a,
108+
NPU_SET_IFM2_HEIGHT0_M1 = 0x18b,
109+
NPU_SET_IFM2_HEIGHT1_M1 = 0x18c,
110+
NPU_SET_IFM2_REGION = 0x18f,
111+
NPU_SET_IFM_BASE0 = 0x4000,
112+
NPU_SET_IFM_BASE1 = 0x4001,
113+
NPU_SET_IFM_BASE2 = 0x4002,
114+
NPU_SET_IFM_BASE3 = 0x4003,
115+
NPU_SET_IFM_STRIDE_X = 0x4004,
116+
NPU_SET_IFM_STRIDE_Y = 0x4005,
117+
NPU_SET_IFM_STRIDE_C = 0x4006,
118+
NPU_SET_OFM_BASE0 = 0x4010,
119+
NPU_SET_OFM_BASE1 = 0x4011,
120+
NPU_SET_OFM_BASE2 = 0x4012,
121+
NPU_SET_OFM_BASE3 = 0x4013,
122+
NPU_SET_OFM_STRIDE_X = 0x4014,
123+
NPU_SET_OFM_STRIDE_Y = 0x4015,
124+
NPU_SET_OFM_STRIDE_C = 0x4016,
125+
NPU_SET_WEIGHT_BASE = 0x4020,
126+
NPU_SET_WEIGHT_LENGTH = 0x4021,
127+
NPU_SET_SCALE_BASE = 0x4022,
128+
NPU_SET_SCALE_LENGTH = 0x4023,
129+
NPU_SET_DMA0_SRC = 0x4030,
130+
NPU_SET_DMA0_DST = 0x4031,
131+
NPU_SET_DMA0_LEN = 0x4032,
132+
NPU_SET_DMA0_SRC_STRIDE0 = 0x4033,
133+
NPU_SET_DMA0_SRC_STRIDE1 = 0x4034,
134+
NPU_SET_DMA0_DST_STRIDE0 = 0x4035,
135+
NPU_SET_DMA0_DST_STRIDE1 = 0x4036,
136+
NPU_SET_IFM2_BASE0 = 0x4080,
137+
NPU_SET_IFM2_BASE1 = 0x4081,
138+
NPU_SET_IFM2_BASE2 = 0x4082,
139+
NPU_SET_IFM2_BASE3 = 0x4083,
140+
NPU_SET_IFM2_STRIDE_X = 0x4084,
141+
NPU_SET_IFM2_STRIDE_Y = 0x4085,
142+
NPU_SET_IFM2_STRIDE_C = 0x4086,
143+
NPU_SET_WEIGHT1_BASE = 0x4090,
144+
NPU_SET_WEIGHT1_LENGTH = 0x4091,
145+
NPU_SET_SCALE1_BASE = 0x4092,
146+
NPU_SET_WEIGHT2_BASE = 0x4092,
147+
NPU_SET_SCALE1_LENGTH = 0x4093,
148+
NPU_SET_WEIGHT2_LENGTH = 0x4093,
149+
NPU_SET_WEIGHT3_BASE = 0x4094,
150+
NPU_SET_WEIGHT3_LENGTH = 0x4095,
151+
};
152+
153+
#define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */
154+
155+
/**
156+
* struct ethosu_device - Ethosu device
157+
*/
158+
struct ethosu_device {
159+
/** @base: Base drm_device. */
160+
struct drm_device base;
161+
162+
/** @iomem: CPU mapping of the registers. */
163+
void __iomem *regs;
164+
165+
void __iomem *sram;
166+
struct gen_pool *srampool;
167+
dma_addr_t sramphys;
168+
169+
struct clk_bulk_data *clks;
170+
int num_clks;
171+
int irq;
172+
173+
struct drm_ethosu_npu_info npu_info;
174+
175+
struct ethosu_job *in_flight_job;
176+
/* For in_flight_job and ethosu_job_hw_submit() */
177+
struct mutex job_lock;
178+
179+
/* For dma_fence */
180+
spinlock_t fence_lock;
181+
182+
struct drm_gpu_scheduler sched;
183+
/* For ethosu_job_do_push() */
184+
struct mutex sched_lock;
185+
u64 fence_context;
186+
u64 emit_seqno;
187+
};
188+
189+
#define to_ethosu_device(drm_dev) \
190+
((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base))
191+
192+
static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev)
193+
{
194+
return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1;
195+
}
196+
197+
#endif

0 commit comments

Comments
 (0)