From 02730c04bc139f64aeedd3bb3896f104d2445499 Mon Sep 17 00:00:00 2001 From: LwhJesse <256257451+LwhJesse@users.noreply.github.com> Date: Thu, 30 Apr 2026 16:58:43 +0800 Subject: [PATCH 1/3] Reduce redundant CUDA Jacobian uploads --- Common/src/linear_algebra/CSysMatrixGPU.cu | 1 - Common/src/linear_algebra/CSysSolve.cpp | 6 ++++++ SU2_CFD/include/integration/CNewtonIntegration.hpp | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Common/src/linear_algebra/CSysMatrixGPU.cu b/Common/src/linear_algebra/CSysMatrixGPU.cu index 90389264ed3..7e0c81ca54f 100644 --- a/Common/src/linear_algebra/CSysMatrixGPU.cu +++ b/Common/src/linear_algebra/CSysMatrixGPU.cu @@ -70,7 +70,6 @@ void CSysMatrix::GPUMatrixVectorProduct(const CSysVector ScalarType* d_vec = vec.GetDevicePointer(); ScalarType* d_prod = prod.GetDevicePointer(); - HtDTransfer(); vec.HtDTransfer(); prod.GPUSetVal(0.0); diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 953bed0ef2a..ad57bfb179b 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -1464,6 +1464,9 @@ unsigned long CSysSolve::Solve(CSysMatrix& Jacobian, con HandleTemporariesIn(LinSysRes, LinSysSol); +#ifdef HAVE_CUDA + if (config->GetCUDA()) Jacobian.HtDTransfer(); +#endif auto mat_vec = CSysMatrixVectorProduct(Jacobian, geometry, config); /*--- Build preconditioner. ---*/ @@ -1643,6 +1646,9 @@ unsigned long CSysSolve::Solve_b(CSysMatrix& Jacobian, c /*--- Set up preconditioner and matrix-vector product ---*/ +#ifdef HAVE_CUDA + if (config->GetCUDA()) Jacobian.HtDTransfer(); +#endif auto mat_vec = CSysMatrixVectorProduct(Jacobian, geometry, config); const auto kindPrec = static_cast(KindPrecond); diff --git a/SU2_CFD/include/integration/CNewtonIntegration.hpp b/SU2_CFD/include/integration/CNewtonIntegration.hpp index da8ff67930a..fc27d5b77c0 100644 --- a/SU2_CFD/include/integration/CNewtonIntegration.hpp +++ b/SU2_CFD/include/integration/CNewtonIntegration.hpp @@ -158,6 +158,9 @@ class CNewtonIntegration final : public CIntegration { (*preconditioner)(u, v); return 0; } +#ifdef HAVE_CUDA + if (config->GetCUDA()) solvers[FLOW_SOL]->Jacobian.HtDTransfer(); +#endif auto product = CSysMatrixVectorProduct(solvers[FLOW_SOL]->Jacobian, geometry, config); v = MixedScalar(0.0); MixedScalar eps_t = eps; From 55385ddfd2af0a41758aaf15676bdf4625000d14 Mon Sep 17 00:00:00 2001 From: LwhJesse <256257451+LwhJesse@users.noreply.github.com> Date: Wed, 6 May 2026 22:14:30 +0800 Subject: [PATCH 2/3] Move CUDA Jacobian upload into CSysMatrixVectorProduct --- Common/include/linear_algebra/CMatrixVectorProduct.hpp | 6 +++++- Common/src/linear_algebra/CSysSolve.cpp | 6 ------ SU2_CFD/include/integration/CNewtonIntegration.hpp | 3 --- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Common/include/linear_algebra/CMatrixVectorProduct.hpp b/Common/include/linear_algebra/CMatrixVectorProduct.hpp index 878bb132b98..e6baca2acf1 100644 --- a/Common/include/linear_algebra/CMatrixVectorProduct.hpp +++ b/Common/include/linear_algebra/CMatrixVectorProduct.hpp @@ -82,7 +82,11 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct { */ inline CSysMatrixVectorProduct(const CSysMatrix& matrix_ref, CGeometry* geometry_ref, const CConfig* config_ref) - : matrix(matrix_ref), geometry(geometry_ref), config(config_ref) {} + : matrix(matrix_ref), geometry(geometry_ref), config(config_ref) { +#ifdef HAVE_CUDA + if (config->GetCUDA()) matrix.HtDTransfer(); +#endif + } /*! * \note This class cannot be default constructed as that would leave us with invalid pointers. diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index ad57bfb179b..953bed0ef2a 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -1464,9 +1464,6 @@ unsigned long CSysSolve::Solve(CSysMatrix& Jacobian, con HandleTemporariesIn(LinSysRes, LinSysSol); -#ifdef HAVE_CUDA - if (config->GetCUDA()) Jacobian.HtDTransfer(); -#endif auto mat_vec = CSysMatrixVectorProduct(Jacobian, geometry, config); /*--- Build preconditioner. ---*/ @@ -1646,9 +1643,6 @@ unsigned long CSysSolve::Solve_b(CSysMatrix& Jacobian, c /*--- Set up preconditioner and matrix-vector product ---*/ -#ifdef HAVE_CUDA - if (config->GetCUDA()) Jacobian.HtDTransfer(); -#endif auto mat_vec = CSysMatrixVectorProduct(Jacobian, geometry, config); const auto kindPrec = static_cast(KindPrecond); diff --git a/SU2_CFD/include/integration/CNewtonIntegration.hpp b/SU2_CFD/include/integration/CNewtonIntegration.hpp index fc27d5b77c0..da8ff67930a 100644 --- a/SU2_CFD/include/integration/CNewtonIntegration.hpp +++ b/SU2_CFD/include/integration/CNewtonIntegration.hpp @@ -158,9 +158,6 @@ class CNewtonIntegration final : public CIntegration { (*preconditioner)(u, v); return 0; } -#ifdef HAVE_CUDA - if (config->GetCUDA()) solvers[FLOW_SOL]->Jacobian.HtDTransfer(); -#endif auto product = CSysMatrixVectorProduct(solvers[FLOW_SOL]->Jacobian, geometry, config); v = MixedScalar(0.0); MixedScalar eps_t = eps; From e38dfc00e56dad7802982642c9f5d0c24b1bb788 Mon Sep 17 00:00:00 2001 From: LwhJesse <256257451+LwhJesse@users.noreply.github.com> Date: Fri, 8 May 2026 20:24:15 +0800 Subject: [PATCH 3/3] Defer CUDA matrix upload to first matvec use --- .../include/linear_algebra/CMatrixVectorProduct.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Common/include/linear_algebra/CMatrixVectorProduct.hpp b/Common/include/linear_algebra/CMatrixVectorProduct.hpp index e6baca2acf1..a0cecaa63d7 100644 --- a/Common/include/linear_algebra/CMatrixVectorProduct.hpp +++ b/Common/include/linear_algebra/CMatrixVectorProduct.hpp @@ -72,6 +72,7 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct { const CSysMatrix& matrix; /*!< \brief pointer to matrix that defines the product. */ CGeometry* geometry; /*!< \brief geometry associated with the matrix. */ const CConfig* config; /*!< \brief config of the problem. */ + mutable bool matrix_uploaded = false; /*!< \brief Upload the matrix lazily on the first actual GPU matvec. */ public: /*! @@ -82,11 +83,7 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct { */ inline CSysMatrixVectorProduct(const CSysMatrix& matrix_ref, CGeometry* geometry_ref, const CConfig* config_ref) - : matrix(matrix_ref), geometry(geometry_ref), config(config_ref) { -#ifdef HAVE_CUDA - if (config->GetCUDA()) matrix.HtDTransfer(); -#endif - } + : matrix(matrix_ref), geometry(geometry_ref), config(config_ref) {} /*! * \note This class cannot be default constructed as that would leave us with invalid pointers. @@ -101,6 +98,10 @@ class CSysMatrixVectorProduct final : public CMatrixVectorProduct { inline void operator()(const CSysVector& u, CSysVector& v) const override { if (config->GetCUDA()) { #ifdef HAVE_CUDA + if (!matrix_uploaded) { + matrix.HtDTransfer(); + matrix_uploaded = true; + } matrix.GPUMatrixVectorProduct(u, v, geometry, config); #else SU2_MPI::Error(