OpenMathLib · moluopro · Jul 3, 2026 · Jul 3, 2026 · Jul 3, 2026 · Jul 3, 2026
diff --git a/.github/workflows/apple_m.yml b/.github/workflows/apple_m.yml
@@ -1,6 +1,14 @@
 name: apple m
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml
@@ -5,10 +5,16 @@ on:
     branches:
       - develop
       - release-**
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
   pull_request:
     branches:
       - develop
       - release-**
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/c910v.yml b/.github/workflows/c910v.yml
@@ -1,6 +1,14 @@
 name: c910v qemu test
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/codspeed-bench.yml b/.github/workflows/codspeed-bench.yml
@@ -1,6 +1,14 @@
 name: Run codspeed benchmarks
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -4,9 +4,17 @@ on:
   push:
     branches:
       - develop
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+      - '.github/workflows/docs.yml'
   pull_request:
     branches:
       - develop
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+      - '.github/workflows/docs.yml'
 
 jobs:
   build:

diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml
@@ -1,6 +1,15 @@
 name: continuous build
 
-on: [push, pull_request, workflow_dispatch]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  workflow_dispatch:
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/harmonyos.yml b/.github/workflows/harmonyos.yml
@@ -1,6 +1,14 @@
 name: harmonyos
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml
@@ -1,6 +1,14 @@
 name: loongarch64 qemu test
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/loongarch64_clang.yml b/.github/workflows/loongarch64_clang.yml
@@ -1,6 +1,14 @@
 name: loongarch64 clang qemu test
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/mips64.yml b/.github/workflows/mips64.yml
@@ -1,6 +1,14 @@
 name: mips64 qemu test
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/riscv64_vector.yml b/.github/workflows/riscv64_vector.yml
@@ -1,6 +1,14 @@
 name: riscv64 zvl256b qemu test
 
-on: [push, pull_request]
+on:
+  push:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
+  pull_request:
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/windows_arm64.yml b/.github/workflows/windows_arm64.yml
@@ -4,9 +4,15 @@ on:
   push:
     branches:
       - develop
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
   pull_request:
     branches:
       - develop
+    paths-ignore:
+      - 'docs/**'
+      - '**/*.md'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/docs/build_system.md b/docs/build_system.md
@@ -98,7 +98,7 @@ though - please read the linked Makefiles if you want to see all variables.
 - `BUILD_DOUBLE`: build the double-precision real functions
 - `BUILD_COMPLEX`: build the single-precision complex functions
 - `BUILD_COMPLEX16`: build the double-precision complex functions
-- `BUILD_BFLOAT16`: build the "half precision brainfloat" real functions 
+- `BUILD_BFLOAT16`: build the `bfloat16` real functions
 - `EXPRECISION`: (do not use, this is a work in progress) option to use `long
   double` functions
 

diff --git a/docs/developers.md b/docs/developers.md
@@ -80,8 +80,8 @@ According to the above `KERNEL.HASWELL`, OpenBLAS Haswell dgemm kernel file is `
     ["Anatomy of High-Performance Matrix Multiplication"](http://delivery.acm.org/10.1145/1360000/1356053/a12-goto.pdf?ip=155.68.162.54&id=1356053&acc=ACTIVE%20SERVICE&key=A79D83B43E50B5B8%2EF070BBE7E45C3F17%2E4D4702B0C3E38B35%2E4D4702B0C3E38B35&__acm__=1517932837_edfe766f1e295d9a7830812371e1d173).
     ACM Transactions on Mathematical Software 34 (3): Article 12
 
-    (The above link is available only to ACM members, but this and many related
-    papers is also available on [the pages of van de Geijn's FLAME project](http://www.cs.utexas.edu/~flame/web/FLAMEPublications.html))
+    (The above link is available only to ACM members, but this paper and many
+    related papers are also available on [the pages of van de Geijn's FLAME project](http://www.cs.utexas.edu/~flame/web/FLAMEPublications.html))
 
 The `driver/level3/level3.c` is the implementation of Goto's algorithm.
 Meanwhile, you can look at `kernel/generic/gemmkernel_2x2.c`, which is a naive

diff --git a/docs/distributing.md b/docs/distributing.md
@@ -4,13 +4,14 @@
     This document contains recommendations only - packagers and other
     redistributors are in charge of how OpenBLAS is built and distributed in their
     systems, and may have good reasons to deviate from the guidance given on this
-    page. These recommendations are aimed at general packaging systems, with a user
-    base that typically is large, open source (or freely available at least), and
-    doesn't behave uniformly or that the packager is directly connected with.*
+    page. These recommendations are aimed at general packaging systems that are
+    open source (or at least freely available) and typically serve a large user
+    base that does not behave uniformly and is not directly connected with the
+    packager.
 
 OpenBLAS has a large number of build-time options which can be used to change
 how it behaves at runtime, how artifacts or symbols are named, etc. Variation
-in build configuration can be necessary to acheive a given end goal within a
+in build configuration can be necessary to achieve a given end goal within a
 distribution or as an end user. However, such variation can also make it more
 difficult to build on top of OpenBLAS and ship code or other packages in a way
 that works across many different distros. Here we provide guidance about the
@@ -42,7 +43,7 @@ settings):
    while it does make up a significant part of the binary size of the installed
    library, that does not outweigh the regression in usability when deviating
    from the default here.[^1]
-3. Always distribute the pkg-config (`.pc`) and CMake `.cmake`) dependency
+3. Always distribute the pkg-config (`.pc`) and CMake (`.cmake`) dependency
    detection files. These files are used by build systems when users want to
    link against OpenBLAS, and there is no benefit of leaving them out.
 4. Provide the LP64 interface by default, and if in addition to that you choose

diff --git a/docs/extensions.md b/docs/extensions.md
@@ -13,9 +13,8 @@ This page documents those non-standard APIs.
 | ?omatcopy     | s,d,c,z       | out-of-place transposition/copying              |
 | ?geadd        | s,d,c,z       | ATLAS-like matrix add `B = &alpha;*A+&beta;*B`  |
 | ?gemmt        | s,d,c,z       | `gemm` but only a triangular part updated       |
-| cblas_?gemm_batch | s,d,c,z,b | `gemm` with several groups of input data
-|
-| cblas_?gemm_batch_strided | s,d,c,z,b | `gemm` with groups of data stored at fixed offsets in the input arrays 
+| cblas_?gemm_batch | s,d,c,z,b | `gemm` with several groups of input data |
+| cblas_?gemm_batch_strided | s,d,c,z,b | `gemm` with groups of data stored at fixed offsets in the input arrays |
 
 ## bfloat16 functionality
 
@@ -48,4 +47,3 @@ BLAS-like and conversion functions for `hfloat16` (available when OpenBLAS was c
 * `int openblas_set_affinity(int thread_index, size_t cpusetsize, cpu_set_t *cpuset)` sets the CPU affinity mask of the given thread
   to the provided cpuset. Only available on Linux, with semantics identical to `pthread_setaffinity_np`.
 * `openblas_set_thread_callback_function` overrides the default multithreading backend with the provided argument
-
diff --git a/docs/faq.md b/docs/faq.md
@@ -110,7 +110,7 @@ Zaheer has fixed this bug. You can now use the structure instead of C99 complex
 
 ### <a name="Linux_SEGFAULT"></a>I get a SEGFAULT with multi-threading on Linux. What's wrong?
 
-This may be related to a bug in the Linux kernel 2.6.32 (?). Try applying the patch segaults.patch to disable mbind using
+This may be related to a bug in the Linux kernel 2.6.32 (?). Try applying the patch segfaults.patch to disable mbind using
 
      patch < segfaults.patch
 
@@ -213,7 +213,7 @@ AVX-512 (SKYLAKEX) support requires devtoolset-8-gcc-gfortran (which exceeds for
 
 ### <a name="qemu"></a>Building OpenBLAS in QEMU/KVM/XEN
 
-By default, QEMU reports the CPU as "QEMU Virtual CPU version 2.2.0", which shares CPUID with existing 32bit CPU even in 64bit virtual machine, and OpenBLAS recognizes it as PENTIUM2. Depending on the exact combination of CPU features the hypervisor choses to expose, this may not correspond to any CPU that exists, and OpenBLAS will error when trying to build. To fix this, pass `-cpu host` or `-cpu passthough` to QEMU, or another CPU model.
+By default, QEMU reports the CPU as "QEMU Virtual CPU version 2.2.0", which shares CPUID with existing 32bit CPU even in 64bit virtual machine, and OpenBLAS recognizes it as PENTIUM2. Depending on the exact combination of CPU features the hypervisor chooses to expose, this may not correspond to any CPU that exists, and OpenBLAS will error when trying to build. To fix this, pass `-cpu host` to QEMU, or choose another CPU model.
 Similarly, the XEN hypervisor may not pass through all features of the host cpu while reporting the cpu type itself correctly, which can
 lead to compiler error messages about an "ABI change" when compiling AVX512 code. Again changing the Xen configuration by running e.g. 
 "xen-cmdline --set-xen cpuid=avx512" should get around this (as would building OpenBLAS for an older cpu lacking that particular feature, e.g. TARGET=HASWELL)
@@ -290,7 +290,7 @@ There have been a few reports of wrong calculation results and build-time test f
 
 ### <a name="allocmorebuffers"></a>Program is Terminated. Because you tried to allocate too many memory regions
 
-In OpenBLAS, we mange a pool of memory buffers and allocate the number of buffers as the following.
+In OpenBLAS, we manage a pool of memory buffers and set the number of buffers as follows.
 ```
 #define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
 ```
@@ -301,7 +301,7 @@ In `Makefile.system`, we will set `MAX_CPU_NUMBER=NUM_THREADS`.
 
 ### <a name="choose_target_dynamic"></a>How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH
 
-The environment variable which control the kernel selection is `OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`)
+The environment variable that controls the kernel selection is `OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`)
 e.g. `export OPENBLAS_CORETYPE=Haswell`. And the function `char* openblas_get_corename()` returns the used target.
 
 ### <a name="missgoto"></a>After updating the installed OpenBLAS, a program complains about "undefined symbol gotoblas"
@@ -325,7 +325,7 @@ Specifying the "correct" library location with the `-L` flag (like `-L /opt/Open
 
 ### <a name="cudahpl"></a>I want to use OpenBLAS with CUDA in the HPL 2.3 benchmark code but it keeps looking for Intel MKL
 
-You need to edit file src/cuda/cuda_dgemm.c in the NVIDIA version of HPL, change the "handle2" and "handle" dlopen calls to use libopenblas.so instead of libmkl_intel_lp64.so, and add an trailing underscore in the dlsym lines for dgemm_mkl and dtrsm_mkl (like  `dgemm_mkl = (void(*)())dlsym(handle, “dgemm_”);`)
+You need to edit file src/cuda/cuda_dgemm.c in the NVIDIA version of HPL, change the "handle2" and "handle" dlopen calls to use libopenblas.so instead of libmkl_intel_lp64.so, and add a trailing underscore in the dlsym lines for dgemm_mkl and dtrsm_mkl (like  `dgemm_mkl = (void(*)())dlsym(handle, "dgemm_");`)
 
 ### <a name="cpusoffline"></a>Multithreaded OpenBLAS runs no faster or is even slower than singlethreaded on my ARMV7 board
 

diff --git a/docs/user_manual.md b/docs/user_manual.md
@@ -107,9 +107,9 @@ OpenBLAS can be used as a shared or a static library.
 
 ### Link a shared library
 
-The shared library is normally called `libopenblas.so`, but not that the name
+The shared library is normally called `libopenblas.so`, but note that the name
 may be different as a result of build flags used or naming choices by a distro
-packager (see [distributing.md] for details). To link a shared library named
+packager (see [distributing.md](distributing.md) for details). To link a shared library named
 `libopenblas.so`, the flag `-lopenblas` is needed. To find the OpenBLAS headers,
 a `-I/path/to/includedir` is needed. And unless the library is installed in a
 directory that the linker searches by default, also `-L` and `-Wl,-rpath` flags