@@ -1635,6 +1635,16 @@ static void DisableRemainingVblankSemControls(
16351635static void FreeDeviceReference (struct NvKmsPerOpen * pOpen ,
16361636 struct NvKmsPerOpenDev * pOpenDev )
16371637{
1638+ /*
1639+ * If pDevEvo is NULL, the device was already freed due to GPU loss
1640+ * (surprise removal). In this case, skip all hardware-related cleanup
1641+ * and just free the software structures.
1642+ */
1643+ if (pOpenDev -> pDevEvo == NULL ) {
1644+ nvFreePerOpenDev (pOpen , pOpenDev );
1645+ return ;
1646+ }
1647+
16381648 /* Disable all client-owned vblank sync objects that still exist. */
16391649 DisableRemainingVblankSyncObjects (pOpen , pOpenDev );
16401650
@@ -5327,6 +5337,31 @@ void nvRevokeDevice(NVDevEvoPtr pDevEvo)
53275337 }
53285338}
53295339
5340+ /*
5341+ * Invalidate all pOpenDev references to a device.
5342+ * Called when GPU is lost to ensure nvKmsClose doesn't access freed pDevEvo.
5343+ * This sets pOpenDev->pDevEvo to NULL for all open handles.
5344+ */
5345+ void nvInvalidateDeviceReferences (NVDevEvoPtr pDevEvo )
5346+ {
5347+ struct NvKmsPerOpen * pOpen ;
5348+ struct NvKmsPerOpenDev * pOpenDev ;
5349+ NvKmsGenericHandle dev ;
5350+
5351+ if (pDevEvo == NULL ) {
5352+ return ;
5353+ }
5354+
5355+ nvListForEachEntry (pOpen , & perOpenIoctlList , perOpenIoctlListEntry ) {
5356+ FOR_ALL_POINTERS_IN_EVO_API_HANDLES (& pOpen -> ioctl .devHandles ,
5357+ pOpenDev , dev ) {
5358+ if (pOpenDev -> pDevEvo == pDevEvo ) {
5359+ pOpenDev -> pDevEvo = NULL ;
5360+ }
5361+ }
5362+ }
5363+ }
5364+
53305365/*!
53315366 * Open callback.
53325367 *
@@ -6318,44 +6353,6 @@ static void FreeGlobalState(void)
63186353 nvClearDpyOverrides ();
63196354}
63206355
6321- /*
6322- * Reinitialize the global RM client after a GPU surprise removal.
6323- * When a GPU is removed, the RM client handle may become invalid.
6324- * This function re-creates the client handle so that newly attached
6325- * GPUs can be used.
6326- */
6327- void nvKmsReinitializeGlobalClient (void )
6328- {
6329- NvU32 ret ;
6330-
6331- /*
6332- * First, try to free the old client handle. This may fail if RM
6333- * already invalidated it, but that's OK.
6334- */
6335- if (nvEvoGlobal .clientHandle != 0 ) {
6336- nvRmApiFree (nvEvoGlobal .clientHandle , nvEvoGlobal .clientHandle ,
6337- nvEvoGlobal .clientHandle );
6338- nvEvoGlobal .clientHandle = 0 ;
6339- }
6340-
6341- /* Allocate a new root client */
6342- ret = nvRmApiAlloc (NV01_NULL_OBJECT ,
6343- NV01_NULL_OBJECT ,
6344- NV01_NULL_OBJECT ,
6345- NV01_ROOT ,
6346- & nvEvoGlobal .clientHandle );
6347-
6348- if (ret != NVOS_STATUS_SUCCESS ) {
6349- nvEvoLog (EVO_LOG_ERROR , "Failed to reinitialize client after GPU removal" );
6350- return ;
6351- }
6352-
6353- /* Update the RM context */
6354- nvEvoGlobal .rmSmgContext .clientHandle = nvEvoGlobal .clientHandle ;
6355-
6356- nvEvoLog (EVO_LOG_INFO , "Reinitialized global client after GPU surprise removal" );
6357- }
6358-
63596356/*
63606357 * Wrappers to help SMG access NvKmsKAPI's RM context.
63616358 */
0 commit comments