@@ -25,21 +25,41 @@ DEFINE_STATIC_KEY_FALSE(pai_key);
2525
2626enum {
2727 PAI_PMU_CRYPTO , /* Index of PMU pai_crypto */
28+ PAI_PMU_EXT , /* Index of PMU pai_ext */
2829 PAI_PMU_MAX /* # of PAI PMUs */
2930};
3031
32+ enum {
33+ PAIE1_CB_SZ = 0x200 , /* Size of PAIE1 control block */
34+ PAIE1_CTRBLOCK_SZ = 0x400 /* Size of PAIE1 counter blocks */
35+ };
36+
3137struct pai_userdata {
3238 u16 num ;
3339 u64 value ;
3440} __packed ;
3541
42+ /* Create the PAI extension 1 control block area.
43+ * The PAI extension control block 1 is pointed to by lowcore
44+ * address 0x1508 for each CPU. This control block is 512 bytes in size
45+ * and requires a 512 byte boundary alignment.
46+ */
47+ struct paiext_cb { /* PAI extension 1 control block */
48+ u64 header ; /* Not used */
49+ u64 reserved1 ;
50+ u64 acc ; /* Addr to analytics counter control block */
51+ u8 reserved2 [488 ];
52+ } __packed ;
53+
3654struct pai_map {
3755 unsigned long * area ; /* Area for CPU to store counters */
3856 struct pai_userdata * save ; /* Page to store no-zero counters */
3957 unsigned int active_events ; /* # of PAI crypto users */
4058 refcount_t refcnt ; /* Reference count mapped buffers */
4159 struct perf_event * event ; /* Perf event for sampling */
4260 struct list_head syswide_list ; /* List system-wide sampling events */
61+ struct paiext_cb * paiext_cb ; /* PAI extension control block area */
62+ bool fullpage ; /* True: counter area is a full page */
4363};
4464
4565struct pai_mapptr {
@@ -108,7 +128,11 @@ static DEFINE_MUTEX(pai_reserve_mutex);
108128/* Free all memory allocated for event counting/sampling setup */
109129static void pai_free (struct pai_mapptr * mp )
110130{
111- free_page ((unsigned long )mp -> mapptr -> area );
131+ if (mp -> mapptr -> fullpage )
132+ free_page ((unsigned long )mp -> mapptr -> area );
133+ else
134+ kfree (mp -> mapptr -> area );
135+ kfree (mp -> mapptr -> paiext_cb );
112136 kvfree (mp -> mapptr -> save );
113137 kfree (mp -> mapptr );
114138 mp -> mapptr = NULL ;
@@ -215,6 +239,7 @@ static int pai_alloc_cpu(struct perf_event *event, int cpu)
215239{
216240 int rc , idx = PAI_PMU_IDX (event );
217241 struct pai_map * cpump = NULL ;
242+ bool need_paiext_cb = false;
218243 struct pai_mapptr * mp ;
219244
220245 mutex_lock (& pai_reserve_mutex );
@@ -235,11 +260,33 @@ static int pai_alloc_cpu(struct perf_event *event, int cpu)
235260 * Only the first counting event has to allocate a page.
236261 */
237262 mp -> mapptr = cpump ;
238- cpump -> area = (unsigned long * )get_zeroed_page (GFP_KERNEL );
263+ if (idx == PAI_PMU_CRYPTO ) {
264+ cpump -> area = (unsigned long * )get_zeroed_page (GFP_KERNEL );
265+ /* free_page() can handle 0x0 address */
266+ cpump -> fullpage = true;
267+ } else { /* PAI_PMU_EXT */
268+ /*
269+ * Allocate memory for counter area and counter extraction.
270+ * These are
271+ * - a 512 byte block and requires 512 byte boundary
272+ * alignment.
273+ * - a 1KB byte block and requires 1KB boundary
274+ * alignment.
275+ * Only the first counting event has to allocate the area.
276+ *
277+ * Note: This works with commit 59bb47985c1d by default.
278+ * Backporting this to kernels without this commit might
279+ * needs adjustment.
280+ */
281+ cpump -> area = kzalloc (pai_pmu [idx ].area_size , GFP_KERNEL );
282+ cpump -> paiext_cb = kzalloc (PAIE1_CB_SZ , GFP_KERNEL );
283+ need_paiext_cb = true;
284+ }
239285 cpump -> save = kvmalloc_array (pai_pmu [idx ].num_avail + 1 ,
240286 sizeof (struct pai_userdata ),
241287 GFP_KERNEL );
242- if (!cpump -> area || !cpump -> save ) {
288+ if (!cpump -> area || !cpump -> save ||
289+ (need_paiext_cb && !cpump -> paiext_cb )) {
243290 pai_free (mp );
244291 goto undo ;
245292 }
@@ -314,6 +361,8 @@ static int pai_event_valid(struct perf_event *event, int idx)
314361 /* PAI crypto event must be in valid range, try others if not */
315362 if (a -> config < pp -> base || a -> config > pp -> base + pp -> num_avail )
316363 return - ENOENT ;
364+ if (idx == PAI_PMU_EXT && a -> exclude_user )
365+ return - EINVAL ;
317366 PAI_PMU_IDX (event ) = idx ;
318367 return 0 ;
319368}
@@ -422,12 +471,21 @@ static int pai_add(struct perf_event *event, int flags)
422471 int idx = PAI_PMU_IDX (event );
423472 struct pai_mapptr * mp = this_cpu_ptr (pai_root [idx ].mapptr );
424473 struct pai_map * cpump = mp -> mapptr ;
474+ struct paiext_cb * pcb = cpump -> paiext_cb ;
425475 unsigned long ccd ;
426476
427477 if (++ cpump -> active_events == 1 ) {
428- ccd = virt_to_phys (cpump -> area ) | PAI_CRYPTO_KERNEL_OFFSET ;
429- WRITE_ONCE (get_lowcore ()-> ccd , ccd );
430- local_ctl_set_bit (0 , CR0_CRYPTOGRAPHY_COUNTER_BIT );
478+ if (!pcb ) { /* PAI crypto */
479+ ccd = virt_to_phys (cpump -> area ) | PAI_CRYPTO_KERNEL_OFFSET ;
480+ WRITE_ONCE (get_lowcore ()-> ccd , ccd );
481+ local_ctl_set_bit (0 , CR0_CRYPTOGRAPHY_COUNTER_BIT );
482+ } else { /* PAI extension 1 */
483+ ccd = virt_to_phys (pcb );
484+ WRITE_ONCE (get_lowcore ()-> aicd , ccd );
485+ pcb -> acc = virt_to_phys (cpump -> area ) | 0x1 ;
486+ /* Enable CPU instruction lookup for PAIE1 control block */
487+ local_ctl_set_bit (0 , CR0_PAI_EXTENSION_BIT );
488+ }
431489 }
432490 if (flags & PERF_EF_START )
433491 pai_pmu [idx ].pmu -> start (event , PERF_EF_RELOAD );
@@ -471,11 +529,19 @@ static void pai_del(struct perf_event *event, int flags)
471529 int idx = PAI_PMU_IDX (event );
472530 struct pai_mapptr * mp = this_cpu_ptr (pai_root [idx ].mapptr );
473531 struct pai_map * cpump = mp -> mapptr ;
532+ struct paiext_cb * pcb = cpump -> paiext_cb ;
474533
475534 pai_pmu [idx ].pmu -> stop (event , PERF_EF_UPDATE );
476535 if (-- cpump -> active_events == 0 ) {
477- local_ctl_clear_bit (0 , CR0_CRYPTOGRAPHY_COUNTER_BIT );
478- WRITE_ONCE (get_lowcore ()-> ccd , 0 );
536+ if (!pcb ) { /* PAI crypto */
537+ local_ctl_clear_bit (0 , CR0_CRYPTOGRAPHY_COUNTER_BIT );
538+ WRITE_ONCE (get_lowcore ()-> ccd , 0 );
539+ } else { /* PAI extension 1 */
540+ /* Disable CPU instruction lookup for PAIE1 control block */
541+ local_ctl_clear_bit (0 , CR0_PAI_EXTENSION_BIT );
542+ pcb -> acc = 0 ;
543+ WRITE_ONCE (get_lowcore ()-> aicd , 0 );
544+ }
479545 }
480546}
481547
@@ -616,6 +682,70 @@ static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx,
616682 pai_have_samples (PAI_PMU_CRYPTO );
617683}
618684
685+ /* ============================= paiext ====================================*/
686+
687+ static void paiext_event_destroy (struct perf_event * event )
688+ {
689+ pai_event_destroy (event );
690+ }
691+
692+ /* Might be called on different CPU than the one the event is intended for. */
693+ static int paiext_event_init (struct perf_event * event )
694+ {
695+ int rc = pai_event_init (event , PAI_PMU_EXT );
696+
697+ if (!rc ) {
698+ event -> attr .exclude_kernel = true; /* No kernel space part */
699+ event -> destroy = paiext_event_destroy ;
700+ /* Offset of NNPA in paiext_cb */
701+ event -> hw .config_base = offsetof(struct paiext_cb , acc );
702+ }
703+ return rc ;
704+ }
705+
706+ static u64 paiext_getall (struct perf_event * event )
707+ {
708+ return pai_getdata (event , false);
709+ }
710+
711+ static void paiext_read (struct perf_event * event )
712+ {
713+ pai_read (event , paiext_getall );
714+ }
715+
716+ static void paiext_start (struct perf_event * event , int flags )
717+ {
718+ pai_start (event , flags , paiext_getall );
719+ }
720+
721+ static int paiext_add (struct perf_event * event , int flags )
722+ {
723+ return pai_add (event , flags );
724+ }
725+
726+ static void paiext_stop (struct perf_event * event , int flags )
727+ {
728+ pai_stop (event , flags );
729+ }
730+
731+ static void paiext_del (struct perf_event * event , int flags )
732+ {
733+ pai_del (event , flags );
734+ }
735+
736+ /* Called on schedule-in and schedule-out. No access to event structure,
737+ * but for sampling only event NNPA_ALL is allowed.
738+ */
739+ static void paiext_sched_task (struct perf_event_pmu_context * pmu_ctx ,
740+ struct task_struct * task , bool sched_in )
741+ {
742+ /* We started with a clean page on event installation. So read out
743+ * results on schedule_out and if page was dirty, save old values.
744+ */
745+ if (!sched_in )
746+ pai_have_samples (PAI_PMU_EXT );
747+ }
748+
619749/* Attribute definitions for paicrypt interface. As with other CPU
620750 * Measurement Facilities, there is one attribute per mapped counter.
621751 * The number of mapped counters may vary per machine generation. Use
@@ -845,6 +975,81 @@ static const char * const paicrypt_ctrnames[] = {
845975 [172 ] = "PCKMO_ENCRYPT_AES_XTS_256" ,
846976};
847977
978+ static struct attribute * paiext_format_attr [] = {
979+ & format_attr_event .attr ,
980+ NULL ,
981+ };
982+
983+ static struct attribute_group paiext_events_group = {
984+ .name = "events" ,
985+ .attrs = NULL , /* Filled in attr_event_init() */
986+ };
987+
988+ static struct attribute_group paiext_format_group = {
989+ .name = "format" ,
990+ .attrs = paiext_format_attr ,
991+ };
992+
993+ static const struct attribute_group * paiext_attr_groups [] = {
994+ & paiext_events_group ,
995+ & paiext_format_group ,
996+ NULL ,
997+ };
998+
999+ /* Performance monitoring unit for mapped counters */
1000+ static struct pmu paiext = {
1001+ .task_ctx_nr = perf_hw_context ,
1002+ .event_init = paiext_event_init ,
1003+ .add = paiext_add ,
1004+ .del = paiext_del ,
1005+ .start = paiext_start ,
1006+ .stop = paiext_stop ,
1007+ .read = paiext_read ,
1008+ .sched_task = paiext_sched_task ,
1009+ .attr_groups = paiext_attr_groups ,
1010+ };
1011+
1012+ /* List of symbolic PAI extension 1 NNPA counter names. */
1013+ static const char * const paiext_ctrnames [] = {
1014+ [0 ] = "NNPA_ALL" ,
1015+ [1 ] = "NNPA_ADD" ,
1016+ [2 ] = "NNPA_SUB" ,
1017+ [3 ] = "NNPA_MUL" ,
1018+ [4 ] = "NNPA_DIV" ,
1019+ [5 ] = "NNPA_MIN" ,
1020+ [6 ] = "NNPA_MAX" ,
1021+ [7 ] = "NNPA_LOG" ,
1022+ [8 ] = "NNPA_EXP" ,
1023+ [9 ] = "NNPA_IBM_RESERVED_9" ,
1024+ [10 ] = "NNPA_RELU" ,
1025+ [11 ] = "NNPA_TANH" ,
1026+ [12 ] = "NNPA_SIGMOID" ,
1027+ [13 ] = "NNPA_SOFTMAX" ,
1028+ [14 ] = "NNPA_BATCHNORM" ,
1029+ [15 ] = "NNPA_MAXPOOL2D" ,
1030+ [16 ] = "NNPA_AVGPOOL2D" ,
1031+ [17 ] = "NNPA_LSTMACT" ,
1032+ [18 ] = "NNPA_GRUACT" ,
1033+ [19 ] = "NNPA_CONVOLUTION" ,
1034+ [20 ] = "NNPA_MATMUL_OP" ,
1035+ [21 ] = "NNPA_MATMUL_OP_BCAST23" ,
1036+ [22 ] = "NNPA_SMALLBATCH" ,
1037+ [23 ] = "NNPA_LARGEDIM" ,
1038+ [24 ] = "NNPA_SMALLTENSOR" ,
1039+ [25 ] = "NNPA_1MFRAME" ,
1040+ [26 ] = "NNPA_2GFRAME" ,
1041+ [27 ] = "NNPA_ACCESSEXCEPT" ,
1042+ [28 ] = "NNPA_TRANSFORM" ,
1043+ [29 ] = "NNPA_GELU" ,
1044+ [30 ] = "NNPA_MOMENTS" ,
1045+ [31 ] = "NNPA_LAYERNORM" ,
1046+ [32 ] = "NNPA_MATMUL_OP_BCAST1" ,
1047+ [33 ] = "NNPA_SQRT" ,
1048+ [34 ] = "NNPA_INVSQRT" ,
1049+ [35 ] = "NNPA_NORM" ,
1050+ [36 ] = "NNPA_REDUCE" ,
1051+ };
1052+
8481053static void __init attr_event_free (struct attribute * * attrs )
8491054{
8501055 struct perf_pmu_events_attr * pa ;
@@ -946,6 +1151,19 @@ static struct pai_pmu pai_pmu[] __refdata = {
9461151 .exit = pai_pmu_exit ,
9471152 .pmu = & paicrypt ,
9481153 .event_group = & paicrypt_events_group
1154+ },
1155+ [PAI_PMU_EXT ] = {
1156+ .pmuname = "pai_ext" ,
1157+ .facility_nr = 197 ,
1158+ .num_named = ARRAY_SIZE (paiext_ctrnames ),
1159+ .names = paiext_ctrnames ,
1160+ .base = PAI_NNPA_BASE ,
1161+ .kernel_offset = 0 ,
1162+ .area_size = PAIE1_CTRBLOCK_SZ ,
1163+ .init = pai_pmu_init ,
1164+ .exit = pai_pmu_exit ,
1165+ .pmu = & paiext ,
1166+ .event_group = & paiext_events_group
9491167 }
9501168};
9511169
@@ -977,6 +1195,9 @@ static int __init paipmu_setup(void)
9771195 continue ;
9781196 }
9791197 break ;
1198+ case PAI_PMU_EXT :
1199+ p -> num_avail = ib .num_nnpa ;
1200+ break ;
9801201 }
9811202 p -> num_avail += 1 ; /* Add xxx_ALL event */
9821203 if (p -> init ) {
0 commit comments