Skip to content

Commit 47c19fd

Browse files
committed
fix: route avx512vl_128 FMA ops through fma3<avx2_128>
avx512vl_128 derived from avx2_128, whose dispatch chain reaches no FMA3 kernel provider, so fnma/fnms fell back to the generic neg(x*y)+/-z form (an extra vxorpd ahead of vfmadd) even on a real AVX-512 build. Its 256-bit sibling avx512vl_256 already derives from fma3<avx2> and was unaffected. Apply the same fma3<> inheritance pattern as avxvnni and avx512vl_256: introduce fma3<avx2_128> (a strict superset of avx2_128 that adds the 128-bit FMA3 kernels, built by re-including xsimd_fma3_sse.hpp with sse4_2 -> avx2_128, exactly as fma3<avx2> is built from xsimd_fma3_avx.hpp), and derive avx512vl_128 from it. Because fma3<avx2_128> : avx2_128, all avx2_128 integer kernels remain reachable -- FMA is added on top, nothing lost. The tag is gated by XSIMD_WITH_FMA3_AVX2, so without FMA the kernels are absent and dispatch falls through to avx2_128/common unchanged. Validated under Intel SDE -skx: avx512vl_128 fnma/fnms/fma/fms/fmas emit 128-bit vf* (no vxorpd) and match scalar std::fma for float and double; avxvnni and fma3<avx2> codegen unchanged; all FMA-capable x86 arches compile across sse/avx2/avxvnni/avx512 flag sets.
1 parent e79f9d3 commit 47c19fd

5 files changed

Lines changed: 100 additions & 1 deletion

File tree

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* *
7+
* Distributed under the terms of the BSD 3-Clause License. *
8+
* *
9+
* The full license is in the file LICENSE, distributed with this software. *
10+
****************************************************************************/
11+
12+
#ifndef XSIMD_FMA3_AVX2_128_HPP
13+
#define XSIMD_FMA3_AVX2_128_HPP
14+
15+
#include "../types/xsimd_fma3_avx2_128_register.hpp"
16+
17+
// Allow inclusion of xsimd_fma3_sse.hpp
18+
#ifdef XSIMD_FMA3_SSE_HPP
19+
#undef XSIMD_FMA3_SSE_HPP
20+
#define XSIMD_FORCE_FMA3_SSE_HPP
21+
#endif
22+
23+
// Disallow inclusion of ./xsimd_fma3_sse_register.hpp
24+
#ifndef XSIMD_FMA3_SSE_REGISTER_HPP
25+
#define XSIMD_FMA3_SSE_REGISTER_HPP
26+
#define XSIMD_FORCE_FMA3_SSE_REGISTER_HPP
27+
#endif
28+
29+
// Include ./xsimd_fma3_sse.hpp but s/sse4_2/avx2_128
30+
#define sse4_2 avx2_128
31+
#include "./xsimd_fma3_sse.hpp"
32+
#undef sse4_2
33+
#undef XSIMD_FMA3_SSE_HPP
34+
35+
// Carefully restore guards
36+
#ifdef XSIMD_FORCE_FMA3_SSE_HPP
37+
#define XSIMD_FMA3_SSE_HPP
38+
#undef XSIMD_FORCE_FMA3_SSE_HPP
39+
#endif
40+
41+
#ifdef XSIMD_FORCE_FMA3_SSE_REGISTER_HPP
42+
#undef XSIMD_FMA3_SSE_REGISTER_HPP
43+
#undef XSIMD_FORCE_FMA3_SSE_REGISTER_HPP
44+
#endif
45+
46+
#endif

include/xsimd/arch/xsimd_isa.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#endif
7272

7373
#if XSIMD_WITH_FMA3_AVX2
74+
#include "./xsimd_fma3_avx2_128.hpp"
7475
#include "./xsimd_fma3_avx2.hpp"
7576
#endif
7677

include/xsimd/types/xsimd_all_registers.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "./xsimd_avx512vnni_avx512vbmi2_register.hpp"
2525
#include "./xsimd_avx_register.hpp"
2626
#include "./xsimd_avxvnni_register.hpp"
27+
#include "./xsimd_fma3_avx2_128_register.hpp"
2728
#include "./xsimd_fma3_avx2_register.hpp"
2829
#include "./xsimd_fma3_avx_register.hpp"
2930
#include "./xsimd_fma3_sse_register.hpp"

include/xsimd/types/xsimd_avx512vl_register.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define XSIMD_AVX512VL_REGISTER_HPP
1414

1515
#include "./xsimd_avx512cd_register.hpp"
16+
#include "./xsimd_fma3_avx2_128_register.hpp"
1617

1718
namespace xsimd
1819
{
@@ -34,7 +35,7 @@ namespace xsimd
3435
*
3536
* AVX512VL instructions extension for 128 bits registers
3637
*/
37-
struct avx512vl_128 : avx2_128
38+
struct avx512vl_128 : fma3<avx2_128>
3839
{
3940
static constexpr bool supported() noexcept { return XSIMD_WITH_AVX512VL; }
4041
static constexpr bool available() noexcept { return true; }
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/***************************************************************************
2+
* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3+
* Martin Renou *
4+
* Copyright (c) QuantStack *
5+
* Copyright (c) Serge Guelton *
6+
* *
7+
* Distributed under the terms of the BSD 3-Clause License. *
8+
* *
9+
* The full license is in the file LICENSE, distributed with this software. *
10+
****************************************************************************/
11+
12+
#ifndef XSIMD_FMA3_AVX2_128_REGISTER_HPP
13+
#define XSIMD_FMA3_AVX2_128_REGISTER_HPP
14+
15+
#include "./xsimd_avx2_register.hpp"
16+
17+
namespace xsimd
18+
{
19+
template <typename arch>
20+
struct fma3;
21+
22+
/**
23+
* @ingroup architectures
24+
*
25+
* AVX2 + FMA instructions, for 128 bits registers
26+
*/
27+
template <>
28+
struct fma3<avx2_128> : avx2_128
29+
{
30+
static constexpr bool supported() noexcept { return XSIMD_WITH_FMA3_AVX2; }
31+
static constexpr bool available() noexcept { return true; }
32+
static constexpr char const* name() noexcept { return "fma3+avx2/128"; }
33+
};
34+
35+
#if XSIMD_WITH_FMA3_AVX2
36+
37+
#if !XSIMD_WITH_AVX2
38+
#error "architecture inconsistency: fma3+avx2/128 requires avx2"
39+
#endif
40+
41+
namespace types
42+
{
43+
44+
XSIMD_DECLARE_SIMD_REGISTER_ALIAS(fma3<avx2_128>, avx2_128);
45+
46+
}
47+
#endif
48+
49+
}
50+
#endif

0 commit comments

Comments
 (0)