@@ -410,6 +410,7 @@ int tdx_vm_init(struct kvm *kvm)
410410int tdx_vcpu_create (struct kvm_vcpu * vcpu )
411411{
412412 struct kvm_tdx * kvm_tdx = to_kvm_tdx (vcpu -> kvm );
413+ struct vcpu_tdx * tdx = to_tdx (vcpu );
413414
414415 if (kvm_tdx -> state != TD_STATE_INITIALIZED )
415416 return - EIO ;
@@ -438,12 +439,42 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
438439 if ((kvm_tdx -> xfam & XFEATURE_MASK_XTILE ) == XFEATURE_MASK_XTILE )
439440 vcpu -> arch .xfd_no_write_intercept = true;
440441
442+ tdx -> state = VCPU_TD_STATE_UNINITIALIZED ;
443+
441444 return 0 ;
442445}
443446
444447void tdx_vcpu_free (struct kvm_vcpu * vcpu )
445448{
446- /* This is stub for now. More logic will come. */
449+ struct kvm_tdx * kvm_tdx = to_kvm_tdx (vcpu -> kvm );
450+ struct vcpu_tdx * tdx = to_tdx (vcpu );
451+ int i ;
452+
453+ /*
454+ * It is not possible to reclaim pages while hkid is assigned. It might
455+ * be assigned if:
456+ * 1. the TD VM is being destroyed but freeing hkid failed, in which
457+ * case the pages are leaked
458+ * 2. TD VCPU creation failed and this on the error path, in which case
459+ * there is nothing to do anyway
460+ */
461+ if (is_hkid_assigned (kvm_tdx ))
462+ return ;
463+
464+ if (tdx -> vp .tdcx_pages ) {
465+ for (i = 0 ; i < kvm_tdx -> td .tdcx_nr_pages ; i ++ ) {
466+ if (tdx -> vp .tdcx_pages [i ])
467+ tdx_reclaim_control_page (tdx -> vp .tdcx_pages [i ]);
468+ }
469+ kfree (tdx -> vp .tdcx_pages );
470+ tdx -> vp .tdcx_pages = NULL ;
471+ }
472+ if (tdx -> vp .tdvpr_page ) {
473+ tdx_reclaim_control_page (tdx -> vp .tdvpr_page );
474+ tdx -> vp .tdvpr_page = 0 ;
475+ }
476+
477+ tdx -> state = VCPU_TD_STATE_UNINITIALIZED ;
447478}
448479
449480static int tdx_get_capabilities (struct kvm_tdx_cmd * cmd )
@@ -653,6 +684,8 @@ static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
653684 goto free_hkid ;
654685
655686 kvm_tdx -> td .tdcs_nr_pages = tdx_sysinfo -> td_ctrl .tdcs_base_size / PAGE_SIZE ;
687+ /* TDVPS = TDVPR(4K page) + TDCX(multiple 4K pages), -1 for TDVPR. */
688+ kvm_tdx -> td .tdcx_nr_pages = tdx_sysinfo -> td_ctrl .tdvps_base_size / PAGE_SIZE - 1 ;
656689 tdcs_pages = kcalloc (kvm_tdx -> td .tdcs_nr_pages , sizeof (* kvm_tdx -> td .tdcs_pages ),
657690 GFP_KERNEL | __GFP_ZERO );
658691 if (!tdcs_pages )
@@ -930,6 +963,143 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
930963 return r ;
931964}
932965
966+ /* VMM can pass one 64bit auxiliary data to vcpu via RCX for guest BIOS. */
967+ static int tdx_td_vcpu_init (struct kvm_vcpu * vcpu , u64 vcpu_rcx )
968+ {
969+ struct kvm_tdx * kvm_tdx = to_kvm_tdx (vcpu -> kvm );
970+ struct vcpu_tdx * tdx = to_tdx (vcpu );
971+ struct page * page ;
972+ int ret , i ;
973+ u64 err ;
974+
975+ page = alloc_page (GFP_KERNEL );
976+ if (!page )
977+ return - ENOMEM ;
978+ tdx -> vp .tdvpr_page = page ;
979+
980+ tdx -> vp .tdcx_pages = kcalloc (kvm_tdx -> td .tdcx_nr_pages , sizeof (* tdx -> vp .tdcx_pages ),
981+ GFP_KERNEL );
982+ if (!tdx -> vp .tdcx_pages ) {
983+ ret = - ENOMEM ;
984+ goto free_tdvpr ;
985+ }
986+
987+ for (i = 0 ; i < kvm_tdx -> td .tdcx_nr_pages ; i ++ ) {
988+ page = alloc_page (GFP_KERNEL );
989+ if (!page ) {
990+ ret = - ENOMEM ;
991+ goto free_tdcx ;
992+ }
993+ tdx -> vp .tdcx_pages [i ] = page ;
994+ }
995+
996+ err = tdh_vp_create (& kvm_tdx -> td , & tdx -> vp );
997+ if (KVM_BUG_ON (err , vcpu -> kvm )) {
998+ ret = - EIO ;
999+ pr_tdx_error (TDH_VP_CREATE , err );
1000+ goto free_tdcx ;
1001+ }
1002+
1003+ for (i = 0 ; i < kvm_tdx -> td .tdcx_nr_pages ; i ++ ) {
1004+ err = tdh_vp_addcx (& tdx -> vp , tdx -> vp .tdcx_pages [i ]);
1005+ if (KVM_BUG_ON (err , vcpu -> kvm )) {
1006+ pr_tdx_error (TDH_VP_ADDCX , err );
1007+ /*
1008+ * Pages already added are reclaimed by the vcpu_free
1009+ * method, but the rest are freed here.
1010+ */
1011+ for (; i < kvm_tdx -> td .tdcx_nr_pages ; i ++ ) {
1012+ __free_page (tdx -> vp .tdcx_pages [i ]);
1013+ tdx -> vp .tdcx_pages [i ] = NULL ;
1014+ }
1015+ return - EIO ;
1016+ }
1017+ }
1018+
1019+ err = tdh_vp_init (& tdx -> vp , vcpu_rcx , vcpu -> vcpu_id );
1020+ if (KVM_BUG_ON (err , vcpu -> kvm )) {
1021+ pr_tdx_error (TDH_VP_INIT , err );
1022+ return - EIO ;
1023+ }
1024+
1025+ vcpu -> arch .mp_state = KVM_MP_STATE_RUNNABLE ;
1026+
1027+ return 0 ;
1028+
1029+ free_tdcx :
1030+ for (i = 0 ; i < kvm_tdx -> td .tdcx_nr_pages ; i ++ ) {
1031+ if (tdx -> vp .tdcx_pages [i ])
1032+ __free_page (tdx -> vp .tdcx_pages [i ]);
1033+ tdx -> vp .tdcx_pages [i ] = NULL ;
1034+ }
1035+ kfree (tdx -> vp .tdcx_pages );
1036+ tdx -> vp .tdcx_pages = NULL ;
1037+
1038+ free_tdvpr :
1039+ if (tdx -> vp .tdvpr_page )
1040+ __free_page (tdx -> vp .tdvpr_page );
1041+ tdx -> vp .tdvpr_page = 0 ;
1042+
1043+ return ret ;
1044+ }
1045+
1046+ static int tdx_vcpu_init (struct kvm_vcpu * vcpu , struct kvm_tdx_cmd * cmd )
1047+ {
1048+ u64 apic_base ;
1049+ struct vcpu_tdx * tdx = to_tdx (vcpu );
1050+ int ret ;
1051+
1052+ if (cmd -> flags )
1053+ return - EINVAL ;
1054+
1055+ if (tdx -> state != VCPU_TD_STATE_UNINITIALIZED )
1056+ return - EINVAL ;
1057+
1058+ /*
1059+ * TDX requires X2APIC, userspace is responsible for configuring guest
1060+ * CPUID accordingly.
1061+ */
1062+ apic_base = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC |
1063+ (kvm_vcpu_is_reset_bsp (vcpu ) ? MSR_IA32_APICBASE_BSP : 0 );
1064+ if (kvm_apic_set_base (vcpu , apic_base , true))
1065+ return - EINVAL ;
1066+
1067+ ret = tdx_td_vcpu_init (vcpu , (u64 )cmd -> data );
1068+ if (ret )
1069+ return ret ;
1070+
1071+ tdx -> state = VCPU_TD_STATE_INITIALIZED ;
1072+
1073+ return 0 ;
1074+ }
1075+
1076+ int tdx_vcpu_ioctl (struct kvm_vcpu * vcpu , void __user * argp )
1077+ {
1078+ struct kvm_tdx * kvm_tdx = to_kvm_tdx (vcpu -> kvm );
1079+ struct kvm_tdx_cmd cmd ;
1080+ int ret ;
1081+
1082+ if (!is_hkid_assigned (kvm_tdx ) || kvm_tdx -> state == TD_STATE_RUNNABLE )
1083+ return - EINVAL ;
1084+
1085+ if (copy_from_user (& cmd , argp , sizeof (cmd )))
1086+ return - EFAULT ;
1087+
1088+ if (cmd .hw_error )
1089+ return - EINVAL ;
1090+
1091+ switch (cmd .id ) {
1092+ case KVM_TDX_INIT_VCPU :
1093+ ret = tdx_vcpu_init (vcpu , & cmd );
1094+ break ;
1095+ default :
1096+ ret = - EINVAL ;
1097+ break ;
1098+ }
1099+
1100+ return ret ;
1101+ }
1102+
9331103static int tdx_online_cpu (unsigned int cpu )
9341104{
9351105 unsigned long flags ;
0 commit comments