|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-only or MIT */ |
| 2 | +/* Copyright 2025 Arm, Ltd. */ |
| 3 | + |
| 4 | +#ifndef __ETHOSU_DEVICE_H__ |
| 5 | +#define __ETHOSU_DEVICE_H__ |
| 6 | + |
| 7 | +#include <linux/bitfield.h> |
| 8 | +#include <linux/bits.h> |
| 9 | +#include <linux/types.h> |
| 10 | + |
| 11 | +#include <drm/drm_device.h> |
| 12 | +#include <drm/gpu_scheduler.h> |
| 13 | + |
| 14 | +#include <drm/ethosu_accel.h> |
| 15 | + |
| 16 | +struct clk; |
| 17 | +struct gen_pool; |
| 18 | + |
| 19 | +#define NPU_REG_ID 0x0000 |
| 20 | +#define NPU_REG_STATUS 0x0004 |
| 21 | +#define NPU_REG_CMD 0x0008 |
| 22 | +#define NPU_REG_RESET 0x000c |
| 23 | +#define NPU_REG_QBASE 0x0010 |
| 24 | +#define NPU_REG_QBASE_HI 0x0014 |
| 25 | +#define NPU_REG_QREAD 0x0018 |
| 26 | +#define NPU_REG_QCONFIG 0x001c |
| 27 | +#define NPU_REG_QSIZE 0x0020 |
| 28 | +#define NPU_REG_PROT 0x0024 |
| 29 | +#define NPU_REG_CONFIG 0x0028 |
| 30 | +#define NPU_REG_REGIONCFG 0x003c |
| 31 | +#define NPU_REG_AXILIMIT0 0x0040 // U65 |
| 32 | +#define NPU_REG_AXILIMIT1 0x0044 // U65 |
| 33 | +#define NPU_REG_AXILIMIT2 0x0048 // U65 |
| 34 | +#define NPU_REG_AXILIMIT3 0x004c // U65 |
| 35 | +#define NPU_REG_MEM_ATTR0 0x0040 // U85 |
| 36 | +#define NPU_REG_MEM_ATTR1 0x0044 // U85 |
| 37 | +#define NPU_REG_MEM_ATTR2 0x0048 // U85 |
| 38 | +#define NPU_REG_MEM_ATTR3 0x004c // U85 |
| 39 | +#define NPU_REG_AXI_SRAM 0x0050 // U85 |
| 40 | +#define NPU_REG_AXI_EXT 0x0054 // U85 |
| 41 | + |
| 42 | +#define NPU_REG_BASEP(x) (0x0080 + (x) * 8) |
| 43 | +#define NPU_REG_BASEP_HI(x) (0x0084 + (x) * 8) |
| 44 | +#define NPU_BASEP_REGION_MAX 8 |
| 45 | + |
| 46 | +#define ID_ARCH_MAJOR_MASK GENMASK(31, 28) |
| 47 | +#define ID_ARCH_MINOR_MASK GENMASK(27, 20) |
| 48 | +#define ID_ARCH_PATCH_MASK GENMASK(19, 16) |
| 49 | +#define ID_VER_MAJOR_MASK GENMASK(11, 8) |
| 50 | +#define ID_VER_MINOR_MASK GENMASK(7, 4) |
| 51 | + |
| 52 | +#define CONFIG_MACS_PER_CC_MASK GENMASK(3, 0) |
| 53 | +#define CONFIG_CMD_STREAM_VER_MASK GENMASK(7, 4) |
| 54 | + |
| 55 | +#define STATUS_STATE_RUNNING BIT(0) |
| 56 | +#define STATUS_IRQ_RAISED BIT(1) |
| 57 | +#define STATUS_BUS_STATUS BIT(2) |
| 58 | +#define STATUS_RESET_STATUS BIT(3) |
| 59 | +#define STATUS_CMD_PARSE_ERR BIT(4) |
| 60 | +#define STATUS_CMD_END_REACHED BIT(5) |
| 61 | + |
| 62 | +#define CMD_CLEAR_IRQ BIT(1) |
| 63 | +#define CMD_TRANSITION_TO_RUN BIT(0) |
| 64 | + |
| 65 | +#define RESET_PENDING_CSL BIT(1) |
| 66 | +#define RESET_PENDING_CPL BIT(0) |
| 67 | + |
| 68 | +#define PROT_ACTIVE_CSL BIT(1) |
| 69 | + |
| 70 | +enum ethosu_cmds { |
| 71 | + NPU_OP_CONV = 0x2, |
| 72 | + NPU_OP_DEPTHWISE = 0x3, |
| 73 | + NPU_OP_POOL = 0x5, |
| 74 | + NPU_OP_ELEMENTWISE = 0x6, |
| 75 | + NPU_OP_RESIZE = 0x7, // U85 only |
| 76 | + NPU_OP_DMA_START = 0x10, |
| 77 | + NPU_SET_IFM_PAD_TOP = 0x100, |
| 78 | + NPU_SET_IFM_PAD_LEFT = 0x101, |
| 79 | + NPU_SET_IFM_PAD_RIGHT = 0x102, |
| 80 | + NPU_SET_IFM_PAD_BOTTOM = 0x103, |
| 81 | + NPU_SET_IFM_DEPTH_M1 = 0x104, |
| 82 | + NPU_SET_IFM_PRECISION = 0x105, |
| 83 | + NPU_SET_IFM_BROADCAST = 0x108, |
| 84 | + NPU_SET_IFM_WIDTH0_M1 = 0x10a, |
| 85 | + NPU_SET_IFM_HEIGHT0_M1 = 0x10b, |
| 86 | + NPU_SET_IFM_HEIGHT1_M1 = 0x10c, |
| 87 | + NPU_SET_IFM_REGION = 0x10f, |
| 88 | + NPU_SET_OFM_WIDTH_M1 = 0x111, |
| 89 | + NPU_SET_OFM_HEIGHT_M1 = 0x112, |
| 90 | + NPU_SET_OFM_DEPTH_M1 = 0x113, |
| 91 | + NPU_SET_OFM_PRECISION = 0x114, |
| 92 | + NPU_SET_OFM_WIDTH0_M1 = 0x11a, |
| 93 | + NPU_SET_OFM_HEIGHT0_M1 = 0x11b, |
| 94 | + NPU_SET_OFM_HEIGHT1_M1 = 0x11c, |
| 95 | + NPU_SET_OFM_REGION = 0x11f, |
| 96 | + NPU_SET_KERNEL_WIDTH_M1 = 0x120, |
| 97 | + NPU_SET_KERNEL_HEIGHT_M1 = 0x121, |
| 98 | + NPU_SET_KERNEL_STRIDE = 0x122, |
| 99 | + NPU_SET_WEIGHT_REGION = 0x128, |
| 100 | + NPU_SET_SCALE_REGION = 0x129, |
| 101 | + NPU_SET_DMA0_SRC_REGION = 0x130, |
| 102 | + NPU_SET_DMA0_DST_REGION = 0x131, |
| 103 | + NPU_SET_DMA0_SIZE0 = 0x132, |
| 104 | + NPU_SET_DMA0_SIZE1 = 0x133, |
| 105 | + NPU_SET_IFM2_BROADCAST = 0x180, |
| 106 | + NPU_SET_IFM2_PRECISION = 0x185, |
| 107 | + NPU_SET_IFM2_WIDTH0_M1 = 0x18a, |
| 108 | + NPU_SET_IFM2_HEIGHT0_M1 = 0x18b, |
| 109 | + NPU_SET_IFM2_HEIGHT1_M1 = 0x18c, |
| 110 | + NPU_SET_IFM2_REGION = 0x18f, |
| 111 | + NPU_SET_IFM_BASE0 = 0x4000, |
| 112 | + NPU_SET_IFM_BASE1 = 0x4001, |
| 113 | + NPU_SET_IFM_BASE2 = 0x4002, |
| 114 | + NPU_SET_IFM_BASE3 = 0x4003, |
| 115 | + NPU_SET_IFM_STRIDE_X = 0x4004, |
| 116 | + NPU_SET_IFM_STRIDE_Y = 0x4005, |
| 117 | + NPU_SET_IFM_STRIDE_C = 0x4006, |
| 118 | + NPU_SET_OFM_BASE0 = 0x4010, |
| 119 | + NPU_SET_OFM_BASE1 = 0x4011, |
| 120 | + NPU_SET_OFM_BASE2 = 0x4012, |
| 121 | + NPU_SET_OFM_BASE3 = 0x4013, |
| 122 | + NPU_SET_OFM_STRIDE_X = 0x4014, |
| 123 | + NPU_SET_OFM_STRIDE_Y = 0x4015, |
| 124 | + NPU_SET_OFM_STRIDE_C = 0x4016, |
| 125 | + NPU_SET_WEIGHT_BASE = 0x4020, |
| 126 | + NPU_SET_WEIGHT_LENGTH = 0x4021, |
| 127 | + NPU_SET_SCALE_BASE = 0x4022, |
| 128 | + NPU_SET_SCALE_LENGTH = 0x4023, |
| 129 | + NPU_SET_DMA0_SRC = 0x4030, |
| 130 | + NPU_SET_DMA0_DST = 0x4031, |
| 131 | + NPU_SET_DMA0_LEN = 0x4032, |
| 132 | + NPU_SET_DMA0_SRC_STRIDE0 = 0x4033, |
| 133 | + NPU_SET_DMA0_SRC_STRIDE1 = 0x4034, |
| 134 | + NPU_SET_DMA0_DST_STRIDE0 = 0x4035, |
| 135 | + NPU_SET_DMA0_DST_STRIDE1 = 0x4036, |
| 136 | + NPU_SET_IFM2_BASE0 = 0x4080, |
| 137 | + NPU_SET_IFM2_BASE1 = 0x4081, |
| 138 | + NPU_SET_IFM2_BASE2 = 0x4082, |
| 139 | + NPU_SET_IFM2_BASE3 = 0x4083, |
| 140 | + NPU_SET_IFM2_STRIDE_X = 0x4084, |
| 141 | + NPU_SET_IFM2_STRIDE_Y = 0x4085, |
| 142 | + NPU_SET_IFM2_STRIDE_C = 0x4086, |
| 143 | + NPU_SET_WEIGHT1_BASE = 0x4090, |
| 144 | + NPU_SET_WEIGHT1_LENGTH = 0x4091, |
| 145 | + NPU_SET_SCALE1_BASE = 0x4092, |
| 146 | + NPU_SET_WEIGHT2_BASE = 0x4092, |
| 147 | + NPU_SET_SCALE1_LENGTH = 0x4093, |
| 148 | + NPU_SET_WEIGHT2_LENGTH = 0x4093, |
| 149 | + NPU_SET_WEIGHT3_BASE = 0x4094, |
| 150 | + NPU_SET_WEIGHT3_LENGTH = 0x4095, |
| 151 | +}; |
| 152 | + |
| 153 | +#define ETHOSU_SRAM_REGION 2 /* Matching Vela compiler */ |
| 154 | + |
| 155 | +/** |
| 156 | + * struct ethosu_device - Ethosu device |
| 157 | + */ |
| 158 | +struct ethosu_device { |
| 159 | + /** @base: Base drm_device. */ |
| 160 | + struct drm_device base; |
| 161 | + |
| 162 | + /** @iomem: CPU mapping of the registers. */ |
| 163 | + void __iomem *regs; |
| 164 | + |
| 165 | + void __iomem *sram; |
| 166 | + struct gen_pool *srampool; |
| 167 | + dma_addr_t sramphys; |
| 168 | + |
| 169 | + struct clk_bulk_data *clks; |
| 170 | + int num_clks; |
| 171 | + int irq; |
| 172 | + |
| 173 | + struct drm_ethosu_npu_info npu_info; |
| 174 | + |
| 175 | + struct ethosu_job *in_flight_job; |
| 176 | + /* For in_flight_job and ethosu_job_hw_submit() */ |
| 177 | + struct mutex job_lock; |
| 178 | + |
| 179 | + /* For dma_fence */ |
| 180 | + spinlock_t fence_lock; |
| 181 | + |
| 182 | + struct drm_gpu_scheduler sched; |
| 183 | + /* For ethosu_job_do_push() */ |
| 184 | + struct mutex sched_lock; |
| 185 | + u64 fence_context; |
| 186 | + u64 emit_seqno; |
| 187 | +}; |
| 188 | + |
| 189 | +#define to_ethosu_device(drm_dev) \ |
| 190 | + ((struct ethosu_device *)container_of(drm_dev, struct ethosu_device, base)) |
| 191 | + |
| 192 | +static inline bool ethosu_is_u65(const struct ethosu_device *ethosudev) |
| 193 | +{ |
| 194 | + return FIELD_GET(ID_ARCH_MAJOR_MASK, ethosudev->npu_info.id) == 1; |
| 195 | +} |
| 196 | + |
| 197 | +#endif |
0 commit comments