Skip to content

Commit 8b65b0b

Browse files
Thomas Richterhcahca
authored andcommitted
s390/pai_crypto: Merge pai_ext PMU into pai_crypto
Combine PAI cryptography and PAI extension (NNPA) PMUs in one driver. Remove file perf_pai_ext.c and registration of PMU "pai_ext" from perf_pai_crypto.c. Includes: - Shared alloc/free and sched_task handling - NNPA events with exclude_kernel enforced, exclude_user rejected - Setup CR0 bits for both PMUs Signed-off-by: Thomas Richter <tmricht@linux.ibm.com> Reviewed-by: Jan Polensky <japo@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
1 parent 3abb6b1 commit 8b65b0b

3 files changed

Lines changed: 230 additions & 765 deletions

File tree

arch/s390/kernel/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
7979
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
8080
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
8181
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
82-
obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
82+
obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o
8383

8484
obj-$(CONFIG_TRACEPOINTS) += trace.o
8585

arch/s390/kernel/perf_pai_crypto.c

Lines changed: 229 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,41 @@ DEFINE_STATIC_KEY_FALSE(pai_key);
2525

2626
enum {
2727
PAI_PMU_CRYPTO, /* Index of PMU pai_crypto */
28+
PAI_PMU_EXT, /* Index of PMU pai_ext */
2829
PAI_PMU_MAX /* # of PAI PMUs */
2930
};
3031

32+
enum {
33+
PAIE1_CB_SZ = 0x200, /* Size of PAIE1 control block */
34+
PAIE1_CTRBLOCK_SZ = 0x400 /* Size of PAIE1 counter blocks */
35+
};
36+
3137
struct pai_userdata {
3238
u16 num;
3339
u64 value;
3440
} __packed;
3541

42+
/* Create the PAI extension 1 control block area.
43+
* The PAI extension control block 1 is pointed to by lowcore
44+
* address 0x1508 for each CPU. This control block is 512 bytes in size
45+
* and requires a 512 byte boundary alignment.
46+
*/
47+
struct paiext_cb { /* PAI extension 1 control block */
48+
u64 header; /* Not used */
49+
u64 reserved1;
50+
u64 acc; /* Addr to analytics counter control block */
51+
u8 reserved2[488];
52+
} __packed;
53+
3654
struct pai_map {
3755
unsigned long *area; /* Area for CPU to store counters */
3856
struct pai_userdata *save; /* Page to store no-zero counters */
3957
unsigned int active_events; /* # of PAI crypto users */
4058
refcount_t refcnt; /* Reference count mapped buffers */
4159
struct perf_event *event; /* Perf event for sampling */
4260
struct list_head syswide_list; /* List system-wide sampling events */
61+
struct paiext_cb *paiext_cb; /* PAI extension control block area */
62+
bool fullpage; /* True: counter area is a full page */
4363
};
4464

4565
struct pai_mapptr {
@@ -108,7 +128,11 @@ static DEFINE_MUTEX(pai_reserve_mutex);
108128
/* Free all memory allocated for event counting/sampling setup */
109129
static void pai_free(struct pai_mapptr *mp)
110130
{
111-
free_page((unsigned long)mp->mapptr->area);
131+
if (mp->mapptr->fullpage)
132+
free_page((unsigned long)mp->mapptr->area);
133+
else
134+
kfree(mp->mapptr->area);
135+
kfree(mp->mapptr->paiext_cb);
112136
kvfree(mp->mapptr->save);
113137
kfree(mp->mapptr);
114138
mp->mapptr = NULL;
@@ -215,6 +239,7 @@ static int pai_alloc_cpu(struct perf_event *event, int cpu)
215239
{
216240
int rc, idx = PAI_PMU_IDX(event);
217241
struct pai_map *cpump = NULL;
242+
bool need_paiext_cb = false;
218243
struct pai_mapptr *mp;
219244

220245
mutex_lock(&pai_reserve_mutex);
@@ -235,11 +260,33 @@ static int pai_alloc_cpu(struct perf_event *event, int cpu)
235260
* Only the first counting event has to allocate a page.
236261
*/
237262
mp->mapptr = cpump;
238-
cpump->area = (unsigned long *)get_zeroed_page(GFP_KERNEL);
263+
if (idx == PAI_PMU_CRYPTO) {
264+
cpump->area = (unsigned long *)get_zeroed_page(GFP_KERNEL);
265+
/* free_page() can handle 0x0 address */
266+
cpump->fullpage = true;
267+
} else { /* PAI_PMU_EXT */
268+
/*
269+
* Allocate memory for counter area and counter extraction.
270+
* These are
271+
* - a 512 byte block and requires 512 byte boundary
272+
* alignment.
273+
* - a 1KB byte block and requires 1KB boundary
274+
* alignment.
275+
* Only the first counting event has to allocate the area.
276+
*
277+
* Note: This works with commit 59bb47985c1d by default.
278+
* Backporting this to kernels without this commit might
279+
* needs adjustment.
280+
*/
281+
cpump->area = kzalloc(pai_pmu[idx].area_size, GFP_KERNEL);
282+
cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
283+
need_paiext_cb = true;
284+
}
239285
cpump->save = kvmalloc_array(pai_pmu[idx].num_avail + 1,
240286
sizeof(struct pai_userdata),
241287
GFP_KERNEL);
242-
if (!cpump->area || !cpump->save) {
288+
if (!cpump->area || !cpump->save ||
289+
(need_paiext_cb && !cpump->paiext_cb)) {
243290
pai_free(mp);
244291
goto undo;
245292
}
@@ -314,6 +361,8 @@ static int pai_event_valid(struct perf_event *event, int idx)
314361
/* PAI crypto event must be in valid range, try others if not */
315362
if (a->config < pp->base || a->config > pp->base + pp->num_avail)
316363
return -ENOENT;
364+
if (idx == PAI_PMU_EXT && a->exclude_user)
365+
return -EINVAL;
317366
PAI_PMU_IDX(event) = idx;
318367
return 0;
319368
}
@@ -422,12 +471,21 @@ static int pai_add(struct perf_event *event, int flags)
422471
int idx = PAI_PMU_IDX(event);
423472
struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
424473
struct pai_map *cpump = mp->mapptr;
474+
struct paiext_cb *pcb = cpump->paiext_cb;
425475
unsigned long ccd;
426476

427477
if (++cpump->active_events == 1) {
428-
ccd = virt_to_phys(cpump->area) | PAI_CRYPTO_KERNEL_OFFSET;
429-
WRITE_ONCE(get_lowcore()->ccd, ccd);
430-
local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
478+
if (!pcb) { /* PAI crypto */
479+
ccd = virt_to_phys(cpump->area) | PAI_CRYPTO_KERNEL_OFFSET;
480+
WRITE_ONCE(get_lowcore()->ccd, ccd);
481+
local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
482+
} else { /* PAI extension 1 */
483+
ccd = virt_to_phys(pcb);
484+
WRITE_ONCE(get_lowcore()->aicd, ccd);
485+
pcb->acc = virt_to_phys(cpump->area) | 0x1;
486+
/* Enable CPU instruction lookup for PAIE1 control block */
487+
local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
488+
}
431489
}
432490
if (flags & PERF_EF_START)
433491
pai_pmu[idx].pmu->start(event, PERF_EF_RELOAD);
@@ -471,11 +529,19 @@ static void pai_del(struct perf_event *event, int flags)
471529
int idx = PAI_PMU_IDX(event);
472530
struct pai_mapptr *mp = this_cpu_ptr(pai_root[idx].mapptr);
473531
struct pai_map *cpump = mp->mapptr;
532+
struct paiext_cb *pcb = cpump->paiext_cb;
474533

475534
pai_pmu[idx].pmu->stop(event, PERF_EF_UPDATE);
476535
if (--cpump->active_events == 0) {
477-
local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
478-
WRITE_ONCE(get_lowcore()->ccd, 0);
536+
if (!pcb) { /* PAI crypto */
537+
local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
538+
WRITE_ONCE(get_lowcore()->ccd, 0);
539+
} else { /* PAI extension 1 */
540+
/* Disable CPU instruction lookup for PAIE1 control block */
541+
local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
542+
pcb->acc = 0;
543+
WRITE_ONCE(get_lowcore()->aicd, 0);
544+
}
479545
}
480546
}
481547

@@ -616,6 +682,70 @@ static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx,
616682
pai_have_samples(PAI_PMU_CRYPTO);
617683
}
618684

685+
/* ============================= paiext ====================================*/
686+
687+
static void paiext_event_destroy(struct perf_event *event)
688+
{
689+
pai_event_destroy(event);
690+
}
691+
692+
/* Might be called on different CPU than the one the event is intended for. */
693+
static int paiext_event_init(struct perf_event *event)
694+
{
695+
int rc = pai_event_init(event, PAI_PMU_EXT);
696+
697+
if (!rc) {
698+
event->attr.exclude_kernel = true; /* No kernel space part */
699+
event->destroy = paiext_event_destroy;
700+
/* Offset of NNPA in paiext_cb */
701+
event->hw.config_base = offsetof(struct paiext_cb, acc);
702+
}
703+
return rc;
704+
}
705+
706+
static u64 paiext_getall(struct perf_event *event)
707+
{
708+
return pai_getdata(event, false);
709+
}
710+
711+
static void paiext_read(struct perf_event *event)
712+
{
713+
pai_read(event, paiext_getall);
714+
}
715+
716+
static void paiext_start(struct perf_event *event, int flags)
717+
{
718+
pai_start(event, flags, paiext_getall);
719+
}
720+
721+
static int paiext_add(struct perf_event *event, int flags)
722+
{
723+
return pai_add(event, flags);
724+
}
725+
726+
static void paiext_stop(struct perf_event *event, int flags)
727+
{
728+
pai_stop(event, flags);
729+
}
730+
731+
static void paiext_del(struct perf_event *event, int flags)
732+
{
733+
pai_del(event, flags);
734+
}
735+
736+
/* Called on schedule-in and schedule-out. No access to event structure,
737+
* but for sampling only event NNPA_ALL is allowed.
738+
*/
739+
static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx,
740+
struct task_struct *task, bool sched_in)
741+
{
742+
/* We started with a clean page on event installation. So read out
743+
* results on schedule_out and if page was dirty, save old values.
744+
*/
745+
if (!sched_in)
746+
pai_have_samples(PAI_PMU_EXT);
747+
}
748+
619749
/* Attribute definitions for paicrypt interface. As with other CPU
620750
* Measurement Facilities, there is one attribute per mapped counter.
621751
* The number of mapped counters may vary per machine generation. Use
@@ -845,6 +975,81 @@ static const char * const paicrypt_ctrnames[] = {
845975
[172] = "PCKMO_ENCRYPT_AES_XTS_256",
846976
};
847977

978+
static struct attribute *paiext_format_attr[] = {
979+
&format_attr_event.attr,
980+
NULL,
981+
};
982+
983+
static struct attribute_group paiext_events_group = {
984+
.name = "events",
985+
.attrs = NULL, /* Filled in attr_event_init() */
986+
};
987+
988+
static struct attribute_group paiext_format_group = {
989+
.name = "format",
990+
.attrs = paiext_format_attr,
991+
};
992+
993+
static const struct attribute_group *paiext_attr_groups[] = {
994+
&paiext_events_group,
995+
&paiext_format_group,
996+
NULL,
997+
};
998+
999+
/* Performance monitoring unit for mapped counters */
1000+
static struct pmu paiext = {
1001+
.task_ctx_nr = perf_hw_context,
1002+
.event_init = paiext_event_init,
1003+
.add = paiext_add,
1004+
.del = paiext_del,
1005+
.start = paiext_start,
1006+
.stop = paiext_stop,
1007+
.read = paiext_read,
1008+
.sched_task = paiext_sched_task,
1009+
.attr_groups = paiext_attr_groups,
1010+
};
1011+
1012+
/* List of symbolic PAI extension 1 NNPA counter names. */
1013+
static const char * const paiext_ctrnames[] = {
1014+
[0] = "NNPA_ALL",
1015+
[1] = "NNPA_ADD",
1016+
[2] = "NNPA_SUB",
1017+
[3] = "NNPA_MUL",
1018+
[4] = "NNPA_DIV",
1019+
[5] = "NNPA_MIN",
1020+
[6] = "NNPA_MAX",
1021+
[7] = "NNPA_LOG",
1022+
[8] = "NNPA_EXP",
1023+
[9] = "NNPA_IBM_RESERVED_9",
1024+
[10] = "NNPA_RELU",
1025+
[11] = "NNPA_TANH",
1026+
[12] = "NNPA_SIGMOID",
1027+
[13] = "NNPA_SOFTMAX",
1028+
[14] = "NNPA_BATCHNORM",
1029+
[15] = "NNPA_MAXPOOL2D",
1030+
[16] = "NNPA_AVGPOOL2D",
1031+
[17] = "NNPA_LSTMACT",
1032+
[18] = "NNPA_GRUACT",
1033+
[19] = "NNPA_CONVOLUTION",
1034+
[20] = "NNPA_MATMUL_OP",
1035+
[21] = "NNPA_MATMUL_OP_BCAST23",
1036+
[22] = "NNPA_SMALLBATCH",
1037+
[23] = "NNPA_LARGEDIM",
1038+
[24] = "NNPA_SMALLTENSOR",
1039+
[25] = "NNPA_1MFRAME",
1040+
[26] = "NNPA_2GFRAME",
1041+
[27] = "NNPA_ACCESSEXCEPT",
1042+
[28] = "NNPA_TRANSFORM",
1043+
[29] = "NNPA_GELU",
1044+
[30] = "NNPA_MOMENTS",
1045+
[31] = "NNPA_LAYERNORM",
1046+
[32] = "NNPA_MATMUL_OP_BCAST1",
1047+
[33] = "NNPA_SQRT",
1048+
[34] = "NNPA_INVSQRT",
1049+
[35] = "NNPA_NORM",
1050+
[36] = "NNPA_REDUCE",
1051+
};
1052+
8481053
static void __init attr_event_free(struct attribute **attrs)
8491054
{
8501055
struct perf_pmu_events_attr *pa;
@@ -946,6 +1151,19 @@ static struct pai_pmu pai_pmu[] __refdata = {
9461151
.exit = pai_pmu_exit,
9471152
.pmu = &paicrypt,
9481153
.event_group = &paicrypt_events_group
1154+
},
1155+
[PAI_PMU_EXT] = {
1156+
.pmuname = "pai_ext",
1157+
.facility_nr = 197,
1158+
.num_named = ARRAY_SIZE(paiext_ctrnames),
1159+
.names = paiext_ctrnames,
1160+
.base = PAI_NNPA_BASE,
1161+
.kernel_offset = 0,
1162+
.area_size = PAIE1_CTRBLOCK_SZ,
1163+
.init = pai_pmu_init,
1164+
.exit = pai_pmu_exit,
1165+
.pmu = &paiext,
1166+
.event_group = &paiext_events_group
9491167
}
9501168
};
9511169

@@ -977,6 +1195,9 @@ static int __init paipmu_setup(void)
9771195
continue;
9781196
}
9791197
break;
1198+
case PAI_PMU_EXT:
1199+
p->num_avail = ib.num_nnpa;
1200+
break;
9801201
}
9811202
p->num_avail += 1; /* Add xxx_ALL event */
9821203
if (p->init) {

0 commit comments

Comments
 (0)