Skip to content

Commit 53d0be8

Browse files
committed
Add WebAssembly SIMD backend for universal intrinsics
1 parent 7a95460 commit 53d0be8

4 files changed

Lines changed: 57 additions & 2 deletions

File tree

kernel/simd/intrin.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ extern "C" {
5656
#include <arm_neon.h>
5757
#endif
5858

59+
/** WASM SIMD **/
60+
#if defined(ARCH_WASM) && defined(__wasm_simd128__)
61+
#include <wasm_simd128.h>
62+
#endif
63+
5964
// distribute
6065
#if defined(HAVE_AVX512VL) || defined(HAVE_AVX512BF16)
6166
#include "intrin_avx512.h"
@@ -69,6 +74,10 @@ extern "C" {
6974
#include "intrin_neon.h"
7075
#endif
7176

77+
#if defined(ARCH_WASM) && defined(__wasm_simd128__)
78+
#include "intrin_wasm.h"
79+
#endif
80+
7281
#ifndef V_SIMD
7382
#define V_SIMD 0
7483
#define V_SIMD_F64 0

kernel/simd/intrin_wasm.h

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#include <wasm_simd128.h>
2+
3+
#define V_SIMD 128
4+
#define V_SIMD_F64 1
5+
6+
/***************************
7+
* Data Type
8+
***************************/
9+
typedef v128_t v_f32;
10+
typedef v128_t v_f64;
11+
#define v_nlanes_f32 4
12+
#define v_nlanes_f64 2
13+
14+
/***************************
15+
* Arithmetic
16+
***************************/
17+
#define v_add_f32 wasm_f32x4_add
18+
#define v_add_f64 wasm_f64x2_add
19+
#define v_sub_f32 wasm_f32x4_sub
20+
#define v_sub_f64 wasm_f64x2_sub
21+
#define v_mul_f32 wasm_f32x4_mul
22+
#define v_mul_f64 wasm_f64x2_mul
23+
24+
BLAS_FINLINE v_f32 v_muladd_f32(v_f32 a, v_f32 b, v_f32 c)
25+
{ return v_add_f32(v_mul_f32(a, b), c); }
26+
27+
BLAS_FINLINE v_f64 v_muladd_f64(v_f64 a, v_f64 b, v_f64 c)
28+
{ return v_add_f64(v_mul_f64(a, b), c); }
29+
30+
BLAS_FINLINE v_f32 v_mulsub_f32(v_f32 a, v_f32 b, v_f32 c)
31+
{ return v_sub_f32(v_mul_f32(a, b), c); }
32+
33+
BLAS_FINLINE v_f64 v_mulsub_f64(v_f64 a, v_f64 b, v_f64 c)
34+
{ return v_sub_f64(v_mul_f64(a, b), c); }
35+
36+
/***************************
37+
* memory
38+
***************************/
39+
#define v_loadu_f32(a) wasm_v128_load((const float*)a)
40+
#define v_loadu_f64(a) wasm_v128_load((const double*)a)
41+
#define v_storeu_f32(a, v) wasm_v128_store((float*)a, v)
42+
#define v_storeu_f64(a, v) wasm_v128_store((double*)a, v)
43+
#define v_setall_f32(VAL) wasm_f32x4_splat(VAL)
44+
#define v_setall_f64(VAL) wasm_f64x2_splat(VAL)
45+
#define v_zero_f32() wasm_f32x4_splat(0.0f)
46+
#define v_zero_f64() wasm_f64x2_splat(0.0)

kernel/x86_64/drot.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
static void drot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s)
1313
{
1414
BLASLONG i = 0;
15-
#if V_SIMD_F64 && V_SIMD > 256
15+
#if V_SIMD_F64 && (V_SIMD > 256 || defined(ARCH_WASM))
1616
const int vstep = v_nlanes_f64;
1717
const int unrollx4 = n & (-vstep * 4);
1818
const int unrollx = n & -vstep;

kernel/x86_64/srot.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s)
1313
{
1414
BLASLONG i = 0;
1515

16-
#if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128)
16+
#if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128 || defined(ARCH_WASM))
1717
const int vstep = v_nlanes_f32;
1818
const int unrollx4 = n & (-vstep * 4);
1919
const int unrollx = n & -vstep;

0 commit comments

Comments
 (0)