diff --git a/include/xsimd/types/xsimd_avxvnni_register.hpp b/include/xsimd/types/xsimd_avxvnni_register.hpp index e1e1ee964..c46e8ec16 100644 --- a/include/xsimd/types/xsimd_avxvnni_register.hpp +++ b/include/xsimd/types/xsimd_avxvnni_register.hpp @@ -13,6 +13,7 @@ #define XSIMD_AVXVNNI_REGISTER_HPP #include "./xsimd_avx2_register.hpp" +#include "./xsimd_fma3_avx2_register.hpp" namespace xsimd { @@ -21,7 +22,12 @@ namespace xsimd * * AVXVNNI instructions */ - struct avxvnni : avx2 + // Derive from fma3 rather than avx2 so the FMA3 kernels (fnma/fnms -> + // vfnmadd) are in avxvnni's dispatch chain instead of the generic neg(x*y)+z + // fallback. fma3 always derives from avx2 and its kernels are only + // registered when XSIMD_WITH_FMA3_AVX2, so when FMA is disabled this base is + // transparent (dispatch falls straight through to avx2). + struct avxvnni : fma3 { static constexpr bool supported() noexcept { return XSIMD_WITH_AVXVNNI; } static constexpr bool available() noexcept { return true; }