Skip to content

Add SWAR versions of Base validations#15357

Merged
josevalim merged 2 commits into
elixir-lang:mainfrom
PJUllrich:main
May 11, 2026
Merged

Add SWAR versions of Base validations#15357
josevalim merged 2 commits into
elixir-lang:mainfrom
PJUllrich:main

Conversation

@PJUllrich
Copy link
Copy Markdown
Contributor

@PJUllrich PJUllrich commented May 11, 2026

Disclosure: I researched and wrote the following changes with Claude Opus 4.7 but understand the proposed changes and how SWAR works generally. I also wrote the description below myself.

I applied a SWAR (SIMD Within A Register) approach to all Base.validX? functions and benchmarked them against the baseline, but could find performance improvements of roughly 25% only for valid16? and valid32?. My assumption is that in the 64 case, there are simply too many operations in a single SWAR procedure which makes it slower than the current tuple lookup. Weirdly, I could not find any improvements in hex_valid32? although it has the same number of range checks as valid32?.

I will post the benchmark and results in separate comments below.

EDIT:

The following PRs inspired this work:
SWAR-optimize ASCII fast-path: #15255
Use 56-bit SWAR to accelerate binary ASCII traversal: erlang/otp#10948

@PJUllrich
Copy link
Copy Markdown
Contributor Author

Benchmark
# SWAR exploration bench for Base.valid* functions.
#
# Run with the SYSTEM elixir from anywhere:
#   elixir bench_base_swar.ex                  # all benchmarks
#   elixir bench_base_swar.ex valid64          # just valid64?
#   elixir bench_base_swar.ex valid64 valid32  # both
#
# Recognised names: valid16, valid32, hex_valid32, valid64, url_valid64.
# Equivalence checks always run regardless of selection (they're fast).
#
# Self-contained: the BASELINE is a copy of the current valid* logic from
# lib/elixir/lib/base.ex, inlined into a `Baseline` module so the bench does
# not depend on `make stdlib`. The SWAR variants live in `Optimised`, in the
# same style as PR #15255.

Mix.install([{:benchee, "~> 1.5"}])

selected = System.argv()
run? = fn name -> selected == [] or name in selected end

defmodule Baseline do
  @moduledoc false

  # Mirrors the b16 setup in lib/elixir/lib/base.ex.
  b16_alphabet = ~c"0123456789ABCDEF"

  to_lower_dec =
    &Enum.map(&1, fn {encoding, value} = pair ->
      if encoding in ?A..?Z do
        {encoding - ?A + ?a, value}
      else
        pair
      end
    end)

  to_mixed_dec =
    &Enum.flat_map(&1, fn {encoding, value} = pair ->
      if encoding in ?A..?Z do
        [pair, {encoding - ?A + ?a, value}]
      else
        [pair]
      end
    end)

  to_decode_list = fn alphabet ->
    alphabet = Enum.sort(alphabet)
    map = Map.new(alphabet)
    {min, _} = List.first(alphabet)
    {max, _} = List.last(alphabet)
    {min, Enum.map(min..max, &map[&1])}
  end

  def valid16?(string, opts \\ [])

  def valid16?(string, opts) when is_binary(string) and rem(byte_size(string), 2) == 0 do
    case Keyword.get(opts, :case, :upper) do
      :upper -> validate16upper?(string)
      :lower -> validate16lower?(string)
      :mixed -> validate16mixed?(string)
    end
  end

  def valid16?(string, _opts) when is_binary(string) do
    false
  end

  upper = Enum.with_index(b16_alphabet)

  for {base, alphabet} <- [upper: upper, lower: to_lower_dec.(upper), mixed: to_mixed_dec.(upper)] do
    validate_name = :"validate16#{base}?"
    valid_char_name = :"valid_char16#{base}?"

    {min, decoded} = to_decode_list.(alphabet)

    defp unquote(validate_name)(<<>>), do: true

    defp unquote(validate_name)(<<c1, c2, c3, c4, c5, c6, c7, c8, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(valid_char_name)(c3) and
        unquote(valid_char_name)(c4) and
        unquote(valid_char_name)(c5) and
        unquote(valid_char_name)(c6) and
        unquote(valid_char_name)(c7) and
        unquote(valid_char_name)(c8) and
        unquote(validate_name)(rest)
    end

    defp unquote(validate_name)(<<c1, c2, c3, c4, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(valid_char_name)(c3) and
        unquote(valid_char_name)(c4) and
        unquote(validate_name)(rest)
    end

    defp unquote(validate_name)(<<c1, c2, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(validate_name)(rest)
    end

    defp unquote(validate_name)(<<_char, _rest::binary>>), do: false

    @compile {:inline, [{valid_char_name, 1}]}
    defp unquote(valid_char_name)(char)
         when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
         do: true

    defp unquote(valid_char_name)(_char), do: false
  end

  # --- base32 (mirrors lib/elixir/lib/base.ex valid32? machinery) ----------

  b32_alphabet = ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
  b32hex_alphabet = ~c"0123456789ABCDEFGHIJKLMNOPQRSTUV"
  upper32 = Enum.with_index(b32_alphabet)
  hexupper32 = Enum.with_index(b32hex_alphabet)

  def valid32?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)

    case Keyword.get(opts, :case, :upper) do
      :upper -> validate32upper?(string, pad?)
      :lower -> validate32lower?(string, pad?)
      :mixed -> validate32mixed?(string, pad?)
    end
  end

  def hex_valid32?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)

    case Keyword.get(opts, :case, :upper) do
      :upper -> validate32hexupper?(string, pad?)
      :lower -> validate32hexlower?(string, pad?)
      :mixed -> validate32hexmixed?(string, pad?)
    end
  end

  for {base, alphabet} <- [
        upper: upper32,
        lower: to_lower_dec.(upper32),
        mixed: to_mixed_dec.(upper32),
        hexupper: hexupper32,
        hexlower: to_lower_dec.(hexupper32),
        hexmixed: to_mixed_dec.(hexupper32)
      ] do
    validate_name = :"validate32#{base}?"
    validate_main_name = :"validate_main32#{base}?"
    valid_char_name = :"valid_char32#{base}?"
    {min, decoded} = to_decode_list.(alphabet)

    defp unquote(validate_main_name)(<<>>), do: true

    defp unquote(validate_main_name)(<<c1, c2, c3, c4, c5, c6, c7, c8, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(valid_char_name)(c3) and
        unquote(valid_char_name)(c4) and
        unquote(valid_char_name)(c5) and
        unquote(valid_char_name)(c6) and
        unquote(valid_char_name)(c7) and
        unquote(valid_char_name)(c8) and
        unquote(validate_main_name)(rest)
    end

    defp unquote(validate_name)(<<>>, _pad?), do: true

    defp unquote(validate_name)(string, pad?) do
      segs = div(byte_size(string) + 7, 8) - 1
      <<main::size(^segs)-binary-unit(64), rest::binary>> = string
      main_valid? = unquote(validate_main_name)(main)

      case rest do
        _ when not main_valid? ->
          false

        <<c1, c2, ?=, ?=, ?=, ?=, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2)

        <<c1, c2, c3, c4, ?=, ?=, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4)

        <<c1, c2, c3, c4, c5, ?=, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5)

        <<c1, c2, c3, c4, c5, c6, c7, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7)

        <<c1, c2, c3, c4, c5, c6, c7, c8>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7) and unquote(valid_char_name)(c8)

        <<c1, c2>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2)

        <<c1, c2, c3, c4>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4)

        <<c1, c2, c3, c4, c5>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5)

        <<c1, c2, c3, c4, c5, c6, c7>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7)

        _ ->
          false
      end
    end

    @compile {:inline, [{valid_char_name, 1}]}
    defp unquote(valid_char_name)(char)
         when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
         do: true

    defp unquote(valid_char_name)(_char), do: false
  end

  # --- base64 (mirrors lib/elixir/lib/base.ex valid64?/url_valid64? machinery)

  b64_alphabet = ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
  b64url_alphabet = ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"

  defp remove_ignored(string, nil), do: string

  defp remove_ignored(string, :whitespace) do
    for <<char::8 <- string>>, char not in ~c"\s\t\r\n", into: <<>>, do: <<char::8>>
  end

  def valid64?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)
    string |> remove_ignored(opts[:ignore]) |> validate64base?(pad?)
  end

  def url_valid64?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)
    string |> remove_ignored(opts[:ignore]) |> validate64url?(pad?)
  end

  for {base, alphabet} <- [base: b64_alphabet, url: b64url_alphabet] do
    validate_name = :"validate64#{base}?"
    validate_main_name = :"validate_main64#{base}?"
    valid_char_name = :"valid_char64#{base}?"
    {min, decoded} = alphabet |> Enum.with_index() |> to_decode_list.()

    defp unquote(validate_main_name)(<<>>), do: true

    defp unquote(validate_main_name)(<<c1, c2, c3, c4, c5, c6, c7, c8, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(valid_char_name)(c3) and
        unquote(valid_char_name)(c4) and
        unquote(valid_char_name)(c5) and
        unquote(valid_char_name)(c6) and
        unquote(valid_char_name)(c7) and
        unquote(valid_char_name)(c8) and
        unquote(validate_main_name)(rest)
    end

    defp unquote(validate_name)(<<>>, _pad?), do: true

    defp unquote(validate_name)(string, pad?) do
      segs = div(byte_size(string) + 7, 8) - 1
      <<main::size(^segs)-binary-unit(64), rest::binary>> = string
      main_valid? = unquote(validate_main_name)(main)

      case rest do
        _ when not main_valid? ->
          false

        <<c1, c2, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2)

        <<c1, c2, c3, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3)

        <<c1, c2, c3, c4>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4)

        <<c1, c2, c3, c4, c5, c6, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6)

        <<c1, c2, c3, c4, c5, c6, c7, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7)

        <<c1, c2, c3, c4, c5, c6, c7, c8>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7) and unquote(valid_char_name)(c8)

        <<c1, c2>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2)

        <<c1, c2, c3>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3)

        <<c1, c2, c3, c4, c5, c6>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6)

        <<c1, c2, c3, c4, c5, c6, c7>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7)

        _ ->
          false
      end
    end

    @compile {:inline, [{valid_char_name, 1}]}
    defp unquote(valid_char_name)(char)
         when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
         do: true

    defp unquote(valid_char_name)(_char), do: false
  end
end

defmodule Optimised do
  @moduledoc false
  import Bitwise

  # SWAR-optimised valid16?/2 and valid32?/2 for :upper, :lower and :mixed.
  #
  # 56 bits = largest integer that fits in a BEAM small int (fixnum range is
  # 59-bit signed on 64-bit OTP). At 64 bits, every `w + 0x80..` would
  # allocate a bignum on the heap and the optimisation collapses.
  # See https://github.com/erlang/otp/pull/10938.
  @swar_mask80 0x80808080808080

  # Per-range SWAR constants, broadcast across 7 lanes. Naming convention:
  #   @swar_ge_X = 0x80 - X  → (w + @swar_ge_X) has high bit set iff byte ≥ X
  #   @swar_gt_X = 0x7F - X  → (w + @swar_gt_X) has high bit set iff byte > X
  # A byte is in range [lo, hi] iff (bxor(w + @swar_ge_lo, w + @swar_gt_hi))
  # has its high bit set.
  @swar_ge_0 0x50505050505050
  @swar_gt_9 0x46464646464646
  @swar_ge_2 0x4E4E4E4E4E4E4E
  @swar_gt_7 0x48484848484848
  @swar_ge_A 0x3F3F3F3F3F3F3F
  @swar_gt_F 0x39393939393939
  @swar_gt_V 0x29292929292929
  @swar_gt_Z 0x25252525252525
  @swar_ge_a 0x1F1F1F1F1F1F1F
  @swar_gt_f 0x19191919191919
  @swar_gt_v 0x09090909090909
  @swar_gt_z 0x05050505050505

  # Mycroft zero-byte detection for base64 singletons (+, -, _).
  # Per lane: high bit set iff `bxor(w, K*ones) - 0x01..01` has its high bit
  # set, i.e. that byte's V value was 0 → original byte was K. Simplified
  # (no `bnot V` term) — for ASCII-gated `w`, borrow propagation false
  # positives only occur for adjacent bytes that happen to equal `K xor 0x01`,
  # which is outside the base64 alphabet, so it never matters here.
  # Pattern follows https://github.com/elixir-lang/elixir/pull/15255.
  @swar_mask01 0x01010101010101
  @swar_plus_x7 0x2B2B2B2B2B2B2B
  @swar_dash_x7 0x2D2D2D2D2D2D2D
  @swar_under_x7 0x5F5F5F5F5F5F5F

  # For base64 standard, '/' (0x2F) sits exactly one below '0' (0x30), so we
  # extend the digit range to [0x2F, 0x39] which catches '/' as part of one
  # range check — saves one Mycroft singleton. Trick lifted from
  # https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
  @swar_ge_slash 0x51515151515151

  # Per-byte validity. One guard per (encoding, case). Used in the SWAR
  # clauses for the 8th byte of the stride, and in the body of tail clauses.
  defguardp valid_char16upper?(c) when c in ?0..?9 or c in ?A..?F
  defguardp valid_char16lower?(c) when c in ?0..?9 or c in ?a..?f
  defguardp valid_char16mixed?(c) when c in ?0..?9 or c in ?A..?F or c in ?a..?f

  defguardp valid_char32upper?(c) when c in ?A..?Z or c in ?2..?7
  defguardp valid_char32lower?(c) when c in ?a..?z or c in ?2..?7
  defguardp valid_char32mixed?(c) when c in ?A..?Z or c in ?a..?z or c in ?2..?7

  # Most common range first — for short-circuit OR, this minimises avg
  # comparisons in the 8th-byte SWAR check + per-byte tail. In hex base32,
  # letters dominate (22/32) over digits (10/32), so letters go first.
  defguardp valid_char32hexupper?(c) when c in ?A..?V or c in ?0..?9
  defguardp valid_char32hexlower?(c) when c in ?a..?v or c in ?0..?9
  defguardp valid_char32hexmixed?(c) when c in ?A..?V or c in ?a..?v or c in ?0..?9

  # base64 alphabets have 3 ranges (A-Z, a-z, 0-9) + 2 singletons. Singletons
  # are excluded from SWAR; chunks containing them fall to per-byte. Order:
  # letters most common (~82%), digits (~16%), singletons (~3%).
  defguardp valid_char64base?(c)
            when c in ?A..?Z or c in ?a..?z or c in ?0..?9 or c == ?+ or c == ?/

  defguardp valid_char64url?(c)
            when c in ?A..?Z or c in ?a..?z or c in ?0..?9 or c == ?- or c == ?_

  # SWAR 7-byte word validity. One guard per (encoding, case).
  #
  # Structure:
  #   1. ASCII gate `band(w, MASK80) == 0`:
  #      every byte < 0x80 so the additions below cannot carry across lanes.
  #
  #   2. "Each byte is in range A OR range B (OR range C)" gate:
  #      For each valid range [lo, hi], one SWAR mask
  #          (w + @swar_ge_lo) bxor (w + @swar_gt_hi)
  #      has high bit = 1 in lanes where the byte is in [lo, hi]. OR the
  #      per-range masks, AND with MASK80, demand all 7 high bits are set.
  defguardp valid_word16upper?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_0, w + @swar_gt_9),
                       bxor(w + @swar_ge_A, w + @swar_gt_F)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word16lower?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_0, w + @swar_gt_9),
                       bxor(w + @swar_ge_a, w + @swar_gt_f)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word16mixed?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bxor(w + @swar_ge_0, w + @swar_gt_9),
                         bxor(w + @swar_ge_A, w + @swar_gt_F)
                       ),
                       bxor(w + @swar_ge_a, w + @swar_gt_f)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32upper?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_A, w + @swar_gt_Z),
                       bxor(w + @swar_ge_2, w + @swar_gt_7)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32lower?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_a, w + @swar_gt_z),
                       bxor(w + @swar_ge_2, w + @swar_gt_7)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32mixed?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bxor(w + @swar_ge_A, w + @swar_gt_Z),
                         bxor(w + @swar_ge_a, w + @swar_gt_z)
                       ),
                       bxor(w + @swar_ge_2, w + @swar_gt_7)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32hexupper?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_0, w + @swar_gt_9),
                       bxor(w + @swar_ge_A, w + @swar_gt_V)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32hexlower?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_0, w + @swar_gt_9),
                       bxor(w + @swar_ge_a, w + @swar_gt_v)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32hexmixed?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bxor(w + @swar_ge_0, w + @swar_gt_9),
                         bxor(w + @swar_ge_A, w + @swar_gt_V)
                       ),
                       bxor(w + @swar_ge_a, w + @swar_gt_v)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  # SWAR for base64 standard: 3 ranges OR'd with a single Mycroft singleton
  # for '+'. The digit range is extended to [0x2F, 0x39] so it absorbs '/'
  # (0x2F) — Lemire-style range/singleton merge.
  defguardp valid_word64base?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bor(
                           bxor(w + @swar_ge_A, w + @swar_gt_Z),
                           bxor(w + @swar_ge_a, w + @swar_gt_z)
                         ),
                         bxor(w + @swar_ge_slash, w + @swar_gt_9)
                       ),
                       bxor(w, @swar_plus_x7) - @swar_mask01
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word64url?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bor(
                           bxor(w + @swar_ge_A, w + @swar_gt_Z),
                           bxor(w + @swar_ge_a, w + @swar_gt_z)
                         ),
                         bxor(w + @swar_ge_0, w + @swar_gt_9)
                       ),
                       bor(
                         bxor(w, @swar_dash_x7) - @swar_mask01,
                         bxor(w, @swar_under_x7) - @swar_mask01
                       )
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  # =========================================================================
  # base16
  # =========================================================================

  def valid16?(string, opts \\ [])

  def valid16?(string, opts) when is_binary(string) and rem(byte_size(string), 2) == 0 do
    case Keyword.get(opts, :case, :upper) do
      :upper -> validate16upper?(string)
      :lower -> validate16lower?(string)
      :mixed -> validate16mixed?(string)
    end
  end

  def valid16?(string, _opts) when is_binary(string), do: false

  # --- :upper -------------------------------------------------------------

  defp validate16upper?(<<w::56, b, rest::binary>>)
       when valid_word16upper?(w) and valid_char16upper?(b),
       do: validate16upper?(rest)

  defp validate16upper?(<<_::56, _, _::binary>>), do: false

  defp validate16upper?(<<c1, c2, c3, c4, rest::binary>>) do
    valid_char16upper?(c1) and
      valid_char16upper?(c2) and
      valid_char16upper?(c3) and
      valid_char16upper?(c4) and
      validate16upper?(rest)
  end

  defp validate16upper?(<<c1, c2, rest::binary>>) do
    valid_char16upper?(c1) and
      valid_char16upper?(c2) and
      validate16upper?(rest)
  end

  defp validate16upper?(<<>>), do: true
  defp validate16upper?(_), do: false

  # --- :lower -------------------------------------------------------------

  defp validate16lower?(<<w::56, b, rest::binary>>)
       when valid_word16lower?(w) and valid_char16lower?(b),
       do: validate16lower?(rest)

  defp validate16lower?(<<_::56, _, _::binary>>), do: false

  defp validate16lower?(<<c1, c2, c3, c4, rest::binary>>) do
    valid_char16lower?(c1) and
      valid_char16lower?(c2) and
      valid_char16lower?(c3) and
      valid_char16lower?(c4) and
      validate16lower?(rest)
  end

  defp validate16lower?(<<c1, c2, rest::binary>>) do
    valid_char16lower?(c1) and
      valid_char16lower?(c2) and
      validate16lower?(rest)
  end

  defp validate16lower?(<<>>), do: true
  defp validate16lower?(_), do: false

  # --- :mixed -------------------------------------------------------------

  defp validate16mixed?(<<w::56, b, rest::binary>>)
       when valid_word16mixed?(w) and valid_char16mixed?(b),
       do: validate16mixed?(rest)

  defp validate16mixed?(<<_::56, _, _::binary>>), do: false

  defp validate16mixed?(<<c1, c2, c3, c4, rest::binary>>) do
    valid_char16mixed?(c1) and
      valid_char16mixed?(c2) and
      valid_char16mixed?(c3) and
      valid_char16mixed?(c4) and
      validate16mixed?(rest)
  end

  defp validate16mixed?(<<c1, c2, rest::binary>>) do
    valid_char16mixed?(c1) and
      valid_char16mixed?(c2) and
      validate16mixed?(rest)
  end

  defp validate16mixed?(<<>>), do: true
  defp validate16mixed?(_), do: false

  # =========================================================================
  # base32
  # =========================================================================

  def valid32?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)

    case Keyword.get(opts, :case, :upper) do
      :upper -> validate32upper?(string, pad?)
      :lower -> validate32lower?(string, pad?)
      :mixed -> validate32mixed?(string, pad?)
    end
  end

  def hex_valid32?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)

    case Keyword.get(opts, :case, :upper) do
      :upper -> validate32hexupper?(string, pad?)
      :lower -> validate32hexlower?(string, pad?)
      :mixed -> validate32hexmixed?(string, pad?)
    end
  end

  # The base32 stride mirrors Baseline: split into `main` (multiple of 8 bytes)
  # and a `rest` (1-8 bytes) so the last block's padding patterns are handled
  # per-byte. SWAR only fast-paths the `main` loop. Same machinery covers
  # both the regular alphabet (`:upper`/`:lower`/`:mixed`) and the
  # extended-hex alphabet (`:hexupper`/`:hexlower`/`:hexmixed`).
  for {case_name, char_guard, word_guard} <- [
        {:upper, :valid_char32upper?, :valid_word32upper?},
        {:lower, :valid_char32lower?, :valid_word32lower?},
        {:mixed, :valid_char32mixed?, :valid_word32mixed?},
        {:hexupper, :valid_char32hexupper?, :valid_word32hexupper?},
        {:hexlower, :valid_char32hexlower?, :valid_word32hexlower?},
        {:hexmixed, :valid_char32hexmixed?, :valid_word32hexmixed?}
      ] do
    validate_name = :"validate32#{case_name}?"
    validate_main_name = :"validate_main32#{case_name}?"

    defp unquote(validate_name)(<<>>, _pad?), do: true

    defp unquote(validate_name)(string, pad?) do
      segs = div(byte_size(string) + 7, 8) - 1
      <<main::size(^segs)-binary-unit(64), rest::binary>> = string
      main_valid? = unquote(validate_main_name)(main)

      case rest do
        _ when not main_valid? ->
          false

        <<c1, c2, ?=, ?=, ?=, ?=, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2)

        <<c1, c2, c3, c4, ?=, ?=, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4)

        <<c1, c2, c3, c4, c5, ?=, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5)

        <<c1, c2, c3, c4, c5, c6, c7, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7)

        <<c1, c2, c3, c4, c5, c6, c7, c8>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7) and unquote(char_guard)(c8)

        <<c1, c2>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2)

        <<c1, c2, c3, c4>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4)

        <<c1, c2, c3, c4, c5>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5)

        <<c1, c2, c3, c4, c5, c6, c7>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7)

        _ ->
          false
      end
    end

    defp unquote(validate_main_name)(<<>>), do: true

    defp unquote(validate_main_name)(<<w::56, b, rest::binary>>)
         when unquote(word_guard)(w) and unquote(char_guard)(b),
         do: unquote(validate_main_name)(rest)

    defp unquote(validate_main_name)(<<_::56, _, _::binary>>), do: false
  end

  # =========================================================================
  # base64
  # =========================================================================

  defp remove_ignored(string, nil), do: string

  defp remove_ignored(string, :whitespace) do
    for <<char::8 <- string>>, char not in ~c"\s\t\r\n", into: <<>>, do: <<char::8>>
  end

  def valid64?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)
    string |> remove_ignored(opts[:ignore]) |> validate64base?(pad?)
  end

  def url_valid64?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)
    string |> remove_ignored(opts[:ignore]) |> validate64url?(pad?)
  end

  # Same dispatch shape as base32: split into `main` (multiple of 8 bytes) and
  # `rest` (≤8 bytes containing padding). SWAR includes singletons via
  # Mycroft, so no per-byte fallback is needed in the main loop.
  for {variant, char_guard, word_guard} <- [
        {:base, :valid_char64base?, :valid_word64base?},
        {:url, :valid_char64url?, :valid_word64url?}
      ] do
    validate_name = :"validate64#{variant}?"
    validate_main_name = :"validate_main64#{variant}?"

    defp unquote(validate_name)(<<>>, _pad?), do: true

    defp unquote(validate_name)(string, pad?) do
      segs = div(byte_size(string) + 7, 8) - 1
      <<main::size(^segs)-binary-unit(64), rest::binary>> = string
      main_valid? = unquote(validate_main_name)(main)

      case rest do
        _ when not main_valid? ->
          false

        <<c1, c2, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2)

        <<c1, c2, c3, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and unquote(char_guard)(c3)

        <<c1, c2, c3, c4>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4)

        <<c1, c2, c3, c4, c5, c6, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6)

        <<c1, c2, c3, c4, c5, c6, c7, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7)

        <<c1, c2, c3, c4, c5, c6, c7, c8>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7) and unquote(char_guard)(c8)

        <<c1, c2>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2)

        <<c1, c2, c3>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and unquote(char_guard)(c3)

        <<c1, c2, c3, c4, c5, c6>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6)

        <<c1, c2, c3, c4, c5, c6, c7>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7)

        _ ->
          false
      end
    end

    defp unquote(validate_main_name)(<<>>), do: true

    defp unquote(validate_main_name)(<<w::56, b, rest::binary>>)
         when unquote(word_guard)(w) and unquote(char_guard)(b),
         do: unquote(validate_main_name)(rest)

    defp unquote(validate_main_name)(<<_::56, _, _::binary>>), do: false
  end
end

sizes = [
  {"1KiB", 1 * 1024},
  {"100KiB", 100 * 1024},
  {"1MiB", 1024 * 1024}
]

# Each input is `{binary, case_opt}` so both scenarios (Baseline / Optimised)
# call the same function with the same arguments and Benchee shows them
# head-to-head per input row.
inputs =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      upper = Base.encode16(data),
      lower = Base.encode16(data, case: :lower),
      # Mixed-case: flip every other letter to lowercase.
      mixed =
        upper
        |> :binary.bin_to_list()
        |> Enum.with_index()
        |> Enum.map(fn
          {c, i} when c in ?A..?F and rem(i, 2) == 1 -> c - ?A + ?a
          {c, _} -> c
        end)
        |> :binary.list_to_bin(),
      # Slow-path shapes (upper only; 'Z' is invalid in all three modes anyway).
      invalid_at_end = binary_part(upper, 0, byte_size(upper) - 1) <> "Z",
      mid = div(byte_size(upper), 2),
      invalid_at_mid =
        binary_part(upper, 0, mid) <>
          "Z" <> binary_part(upper, mid + 1, byte_size(upper) - mid - 1),
      {label, payload} <- [
        {"#{size_label} upper valid", {upper, :upper}},
        {"#{size_label} lower valid", {lower, :lower}},
        {"#{size_label} mixed valid", {mixed, :mixed}},
        {"#{size_label} upper invalid@end", {invalid_at_end, :upper}},
        {"#{size_label} upper invalid@mid", {invalid_at_mid, :upper}}
      ],
      into: %{},
      do: {label, payload}

# Sanity-check Baseline and Optimised against the stdlib Base for every input.
for {label, {bin, case_opt}} <- inputs do
  ref = Base.valid16?(bin, case: case_opt)
  b = Baseline.valid16?(bin, case: case_opt)
  o = Optimised.valid16?(bin, case: case_opt)

  ref === b ||
    raise "Baseline mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

IO.puts("# valid16? equivalence check passed.\n")

# --- base32 inputs ------------------------------------------------------

inputs32 =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      upper = Base.encode32(data),
      lower = Base.encode32(data, case: :lower),
      mixed =
        upper
        |> :binary.bin_to_list()
        |> Enum.with_index()
        |> Enum.map(fn
          {c, i} when c in ?A..?Z and rem(i, 2) == 1 -> c - ?A + ?a
          {c, _} -> c
        end)
        |> :binary.list_to_bin(),
      # Slow-path: '!' (0x21) is invalid in every base32 alphabet.
      # Replacing the last char may overwrite a `=` pad byte; that's fine —
      # both impls then fall through their `case rest do` to `_ -> false`.
      invalid_at_end = binary_part(upper, 0, byte_size(upper) - 1) <> "!",
      mid = div(byte_size(upper), 2),
      invalid_at_mid =
        binary_part(upper, 0, mid) <>
          "!" <> binary_part(upper, mid + 1, byte_size(upper) - mid - 1),
      {label, payload} <- [
        {"#{size_label} upper valid", {upper, :upper}},
        {"#{size_label} lower valid", {lower, :lower}},
        {"#{size_label} mixed valid", {mixed, :mixed}},
        {"#{size_label} upper invalid@end", {invalid_at_end, :upper}},
        {"#{size_label} upper invalid@mid", {invalid_at_mid, :upper}}
      ],
      into: %{},
      do: {label, payload}

for {label, {bin, case_opt}} <- inputs32 do
  ref = Base.valid32?(bin, case: case_opt)
  b = Baseline.valid32?(bin, case: case_opt)
  o = Optimised.valid32?(bin, case: case_opt)

  ref === b ||
    raise "Baseline valid32? mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised valid32? mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

IO.puts("# valid32? equivalence check passed.\n")

# --- base32 hex inputs --------------------------------------------------

inputs32hex =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      upper = Base.hex_encode32(data),
      lower = Base.hex_encode32(data, case: :lower),
      mixed =
        upper
        |> :binary.bin_to_list()
        |> Enum.with_index()
        |> Enum.map(fn
          {c, i} when c in ?A..?V and rem(i, 2) == 1 -> c - ?A + ?a
          {c, _} -> c
        end)
        |> :binary.list_to_bin(),
      # Slow-path: '!' (0x21) is invalid in every base32hex alphabet too.
      invalid_at_end = binary_part(upper, 0, byte_size(upper) - 1) <> "!",
      mid = div(byte_size(upper), 2),
      invalid_at_mid =
        binary_part(upper, 0, mid) <>
          "!" <> binary_part(upper, mid + 1, byte_size(upper) - mid - 1),
      {label, payload} <- [
        {"#{size_label} upper valid", {upper, :upper}},
        {"#{size_label} lower valid", {lower, :lower}},
        {"#{size_label} mixed valid", {mixed, :mixed}},
        {"#{size_label} upper invalid@end", {invalid_at_end, :upper}},
        {"#{size_label} upper invalid@mid", {invalid_at_mid, :upper}}
      ],
      into: %{},
      do: {label, payload}

for {label, {bin, case_opt}} <- inputs32hex do
  ref = Base.hex_valid32?(bin, case: case_opt)
  b = Baseline.hex_valid32?(bin, case: case_opt)
  o = Optimised.hex_valid32?(bin, case: case_opt)

  ref === b ||
    raise "Baseline hex_valid32? mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised hex_valid32? mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

IO.puts("# hex_valid32? equivalence check passed.\n")

# --- base64 inputs ------------------------------------------------------

inputs64base =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      base = Base.encode64(data),
      # Slow-path: '!' (0x21) is invalid in every base64 alphabet.
      invalid_at_end = binary_part(base, 0, byte_size(base) - 1) <> "!",
      mid = div(byte_size(base), 2),
      invalid_at_mid =
        binary_part(base, 0, mid) <>
          "!" <> binary_part(base, mid + 1, byte_size(base) - mid - 1),
      {label, payload} <- [
        {"#{size_label} valid", base},
        {"#{size_label} invalid@end", invalid_at_end},
        {"#{size_label} invalid@mid", invalid_at_mid}
      ],
      into: %{},
      do: {label, payload}

inputs64url =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      url = Base.url_encode64(data),
      invalid_at_end = binary_part(url, 0, byte_size(url) - 1) <> "!",
      mid = div(byte_size(url), 2),
      invalid_at_mid =
        binary_part(url, 0, mid) <>
          "!" <> binary_part(url, mid + 1, byte_size(url) - mid - 1),
      {label, payload} <- [
        {"#{size_label} valid", url},
        {"#{size_label} invalid@end", invalid_at_end},
        {"#{size_label} invalid@mid", invalid_at_mid}
      ],
      into: %{},
      do: {label, payload}

for {label, bin} <- inputs64base do
  ref = Base.valid64?(bin)
  b = Baseline.valid64?(bin)
  o = Optimised.valid64?(bin)

  ref === b ||
    raise "Baseline valid64? mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised valid64? mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

for {label, bin} <- inputs64url do
  ref = Base.url_valid64?(bin)
  b = Baseline.url_valid64?(bin)
  o = Optimised.url_valid64?(bin)

  ref === b ||
    raise "Baseline url_valid64? mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised url_valid64? mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

IO.puts("# valid64? / url_valid64? equivalence check passed.\n")

if run?.("valid16") do
  IO.puts("\n========== valid16? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn {s, c} -> Baseline.valid16?(s, case: c) end,
      "Optimised" => fn {s, c} -> Optimised.valid16?(s, case: c) end
    },
    inputs: inputs,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

if run?.("valid32") do
  IO.puts("\n========== valid32? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn {s, c} -> Baseline.valid32?(s, case: c) end,
      "Optimised" => fn {s, c} -> Optimised.valid32?(s, case: c) end
    },
    inputs: inputs32,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

if run?.("hex_valid32") do
  IO.puts("\n========== hex_valid32? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn {s, c} -> Baseline.hex_valid32?(s, case: c) end,
      "Optimised" => fn {s, c} -> Optimised.hex_valid32?(s, case: c) end
    },
    inputs: inputs32hex,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

if run?.("valid64") do
  IO.puts("\n========== valid64? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn s -> Baseline.valid64?(s) end,
      "Optimised" => fn s -> Optimised.valid64?(s) end
    },
    inputs: inputs64base,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

if run?.("url_valid64") do
  IO.puts("\n========== url_valid64? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn s -> Baseline.url_valid64?(s) end,
      "Optimised" => fn s -> Optimised.url_valid64?(s) end
    },
    inputs: inputs64url,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

@PJUllrich
Copy link
Copy Markdown
Contributor Author

Results
# valid16? equivalence check passed.

# valid32? equivalence check passed.

# hex_valid32? equivalence check passed.

# valid64? / url_valid64? equivalence check passed.


========== valid16? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB lower valid, 100KiB mixed valid, 100KiB upper invalid@end, 100KiB upper invalid@mid, 100KiB upper valid, 1KiB lower valid, 1KiB mixed valid, 1KiB upper invalid@end, 1KiB upper invalid@mid, 1KiB upper valid, 1MiB lower valid, 1MiB mixed valid, 1MiB upper invalid@end, 1MiB upper invalid@mid, 1MiB upper valid
Estimated total run time: 3 min 30 s
Excluding outliers: false


##### With input 100KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        6.71 K      149.12 μs     ±3.09%      148.00 μs      164.79 μs
Baseline         5.46 K      183.24 μs     ±3.33%      181.21 μs      201.81 μs

Comparison:
Optimised        6.71 K
Baseline         5.46 K - 1.23x slower +34.12 μs

##### With input 100KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        6.36 K      157.17 μs     ±2.84%      156.00 μs      172.46 μs
Baseline         5.47 K      182.77 μs     ±3.41%      181.21 μs      201.70 μs

Comparison:
Optimised        6.36 K
Baseline         5.47 K - 1.16x slower +25.59 μs

##### With input 100KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        6.70 K      149.29 μs     ±3.22%      148.13 μs      165.07 μs
Baseline         5.48 K      182.60 μs     ±3.25%      181.08 μs      199.29 μs

Comparison:
Optimised        6.70 K
Baseline         5.48 K - 1.22x slower +33.31 μs

##### With input 100KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised       13.57 K       73.69 μs     ±4.19%       73.13 μs       84.21 μs
Baseline        10.90 K       91.71 μs    ±21.33%       90.50 μs      104.46 μs

Comparison:
Optimised       13.57 K
Baseline        10.90 K - 1.24x slower +18.01 μs

##### With input 100KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        6.70 K      149.29 μs     ±3.12%      148.21 μs      164.54 μs
Baseline         5.48 K      182.55 μs     ±3.23%      181.13 μs      198.89 μs

Comparison:
Optimised        6.70 K
Baseline         5.48 K - 1.22x slower +33.26 μs

##### With input 1KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised      726.68 K        1.38 μs   ±204.70%        1.33 μs        1.71 μs
Baseline       532.92 K        1.88 μs   ±158.46%        1.83 μs        2.38 μs

Comparison:
Optimised      726.68 K
Baseline       532.92 K - 1.36x slower +0.50 μs

##### With input 1KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised      684.33 K        1.46 μs   ±168.23%        1.46 μs        1.83 μs
Baseline       533.09 K        1.88 μs   ±123.24%        1.83 μs        2.38 μs

Comparison:
Optimised      684.33 K
Baseline       533.09 K - 1.28x slower +0.41 μs

##### With input 1KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised      696.36 K        1.44 μs   ±188.41%        1.42 μs        1.79 μs
Baseline       533.96 K        1.87 μs   ±140.11%        1.83 μs        2.33 μs

Comparison:
Optimised      696.36 K
Baseline       533.96 K - 1.30x slower +0.44 μs

##### With input 1KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        1.47 M      679.06 ns   ±382.29%         667 ns         834 ns
Baseline         1.04 M      965.58 ns   ±295.32%         958 ns        1208 ns

Comparison:
Optimised        1.47 M
Baseline         1.04 M - 1.42x slower +286.53 ns

##### With input 1KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised      699.74 K        1.43 μs   ±162.38%        1.42 μs        1.79 μs
Baseline       534.23 K        1.87 μs   ±144.82%        1.83 μs        2.33 μs

Comparison:
Optimised      699.74 K
Baseline       534.23 K - 1.31x slower +0.44 μs

##### With input 1MiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        656.14        1.52 ms     ±1.93%        1.52 ms        1.59 ms
Baseline         526.22        1.90 ms     ±2.63%        1.91 ms        1.98 ms

Comparison:
Optimised        656.14
Baseline         526.22 - 1.25x slower +0.38 ms

##### With input 1MiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        625.11        1.60 ms     ±0.96%        1.60 ms        1.66 ms
Baseline         531.48        1.88 ms     ±2.24%        1.87 ms        1.95 ms

Comparison:
Optimised        625.11
Baseline         531.48 - 1.18x slower +0.28 ms

##### With input 1MiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        661.36        1.51 ms     ±1.94%        1.51 ms        1.57 ms
Baseline         524.24        1.91 ms     ±2.41%        1.91 ms        1.98 ms

Comparison:
Optimised        661.36
Baseline         524.24 - 1.26x slower +0.40 ms

##### With input 1MiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        1.33 K      753.55 μs     ±1.84%      750.59 μs      793.71 μs
Baseline         1.06 K      945.07 μs    ±11.06%      934.46 μs      992.28 μs

Comparison:
Optimised        1.33 K
Baseline         1.06 K - 1.25x slower +191.51 μs

##### With input 1MiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        659.95        1.52 ms     ±1.96%        1.51 ms        1.58 ms
Baseline         524.07        1.91 ms     ±2.13%        1.91 ms        1.96 ms

Comparison:
Optimised        659.95
Baseline         524.07 - 1.26x slower +0.39 ms

========== valid32? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB lower valid, 100KiB mixed valid, 100KiB upper invalid@end, 100KiB upper invalid@mid, 100KiB upper valid, 1KiB lower valid, 1KiB mixed valid, 1KiB upper invalid@end, 1KiB upper invalid@mid, 1KiB upper valid, 1MiB lower valid, 1MiB mixed valid, 1MiB upper invalid@end, 1MiB upper invalid@mid, 1MiB upper valid
Estimated total run time: 3 min 30 s
Excluding outliers: false


##### With input 100KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised       11.36 K       88.04 μs     ±5.34%       86.75 μs       99.13 μs
Baseline         8.98 K      111.38 μs     ±7.91%      109.58 μs      123.83 μs

Comparison:
Optimised       11.36 K
Baseline         8.98 K - 1.27x slower +23.33 μs

##### With input 100KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised       10.25 K       97.60 μs     ±4.75%       96.38 μs      109.63 μs
Baseline         8.87 K      112.76 μs     ±6.79%      112.46 μs      123.58 μs

Comparison:
Optimised       10.25 K
Baseline         8.87 K - 1.16x slower +15.16 μs

##### With input 100KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised       11.15 K       89.65 μs     ±5.18%       89.71 μs      100.33 μs
Baseline         8.95 K      111.76 μs    ±16.74%      109.50 μs      125.29 μs

Comparison:
Optimised       11.15 K
Baseline         8.95 K - 1.25x slower +22.11 μs

##### With input 100KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised       22.98 K       43.51 μs     ±6.57%       42.88 μs       51.21 μs
Baseline        17.90 K       55.87 μs     ±5.44%       55.71 μs       63.33 μs

Comparison:
Optimised       22.98 K
Baseline        17.90 K - 1.28x slower +12.35 μs

##### With input 100KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised       11.10 K       90.07 μs     ±5.10%          90 μs      100.67 μs
Baseline         8.95 K      111.72 μs     ±4.33%      112.33 μs      123.96 μs

Comparison:
Optimised       11.10 K
Baseline         8.95 K - 1.24x slower +21.66 μs

##### With input 1KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.30 M        0.77 μs   ±524.78%        0.75 μs        0.96 μs
Baseline         0.82 M        1.22 μs   ±366.04%        1.21 μs        1.50 μs

Comparison:
Optimised        1.30 M
Baseline         0.82 M - 1.59x slower +0.45 μs

##### With input 1KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.13 M        0.88 μs   ±427.60%        0.88 μs        1.08 μs
Baseline         0.82 M        1.22 μs   ±324.00%        1.21 μs        1.50 μs

Comparison:
Optimised        1.13 M
Baseline         0.82 M - 1.38x slower +0.33 μs

##### With input 1KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.25 M        0.80 μs   ±524.22%        0.79 μs           1 μs
Baseline         0.82 M        1.22 μs   ±462.92%        1.21 μs        1.50 μs

Comparison:
Optimised        1.25 M
Baseline         0.82 M - 1.53x slower +0.42 μs

##### With input 1KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.53 M      394.70 ns  ±1297.49%         375 ns         500 ns
Baseline         1.57 M      637.25 ns   ±631.30%         625 ns         792 ns

Comparison:
Optimised        2.53 M
Baseline         1.57 M - 1.61x slower +242.56 ns

##### With input 1KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.23 M        0.81 μs   ±510.92%        0.79 μs           1 μs
Baseline         0.83 M        1.20 μs   ±367.88%        1.17 μs        1.50 μs

Comparison:
Optimised        1.23 M
Baseline         0.83 M - 1.47x slower +0.39 μs

##### With input 1MiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.10 K        0.91 ms    ±10.55%        0.91 ms        0.95 ms
Baseline         0.87 K        1.15 ms     ±3.55%        1.16 ms        1.20 ms

Comparison:
Optimised        1.10 K
Baseline         0.87 K - 1.27x slower +0.25 ms

##### With input 1MiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.00 K        1.00 ms     ±3.20%        0.99 ms        1.08 ms
Baseline         0.87 K        1.15 ms     ±3.54%        1.16 ms        1.20 ms

Comparison:
Optimised        1.00 K
Baseline         0.87 K - 1.16x slower +0.157 ms

##### With input 1MiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.11 K        0.90 ms     ±4.59%        0.90 ms        0.95 ms
Baseline         0.87 K        1.16 ms     ±3.52%        1.16 ms        1.20 ms

Comparison:
Optimised        1.11 K
Baseline         0.87 K - 1.29x slower +0.26 ms

##### With input 1MiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.22 K      450.23 μs     ±4.17%      450.96 μs      472.67 μs
Baseline         1.73 K      578.06 μs    ±16.57%      576.88 μs      603.76 μs

Comparison:
Optimised        2.22 K
Baseline         1.73 K - 1.28x slower +127.83 μs

##### With input 1MiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.11 K        0.90 ms     ±2.80%        0.90 ms        0.95 ms
Baseline         0.86 K        1.16 ms     ±3.59%        1.16 ms        1.21 ms

Comparison:
Optimised        1.11 K
Baseline         0.86 K - 1.29x slower +0.26 ms

========== hex_valid32? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB lower valid, 100KiB mixed valid, 100KiB upper invalid@end, 100KiB upper invalid@mid, 100KiB upper valid, 1KiB lower valid, 1KiB mixed valid, 1KiB upper invalid@end, 1KiB upper invalid@mid, 1KiB upper valid, 1MiB lower valid, 1MiB mixed valid, 1MiB upper invalid@end, 1MiB upper invalid@mid, 1MiB upper valid
Estimated total run time: 3 min 30 s
Excluding outliers: false


##### With input 100KiB lower valid #####
Name                ips        average  deviation         median         99th %
Baseline         8.85 K      112.94 μs     ±8.98%      112.58 μs      123.63 μs
Optimised        8.81 K      113.49 μs     ±3.48%      113.21 μs      124.51 μs

Comparison:
Baseline         8.85 K
Optimised        8.81 K - 1.00x slower +0.56 μs

##### With input 100KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Baseline         8.89 K      112.50 μs     ±4.04%      112.50 μs      123.79 μs
Optimised        8.22 K      121.66 μs     ±3.08%      121.25 μs      132.50 μs

Comparison:
Baseline         8.89 K
Optimised        8.22 K - 1.08x slower +9.16 μs

##### With input 100KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        9.47 K      105.63 μs     ±4.05%      105.25 μs      116.67 μs
Baseline         8.73 K      114.60 μs    ±36.79%      113.75 μs      124.96 μs

Comparison:
Optimised        9.47 K
Baseline         8.73 K - 1.08x slower +8.96 μs

##### With input 100KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised       20.06 K       49.86 μs    ±12.29%       49.46 μs       57.13 μs
Baseline        17.77 K       56.26 μs     ±5.46%       56.04 μs       63.50 μs

Comparison:
Optimised       20.06 K
Baseline        17.77 K - 1.13x slower +6.40 μs

##### With input 100KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        9.48 K      105.45 μs     ±4.07%      105.13 μs      116.42 μs
Baseline         8.78 K      113.92 μs     ±3.73%      113.54 μs      124.71 μs

Comparison:
Optimised        9.48 K
Baseline         8.78 K - 1.08x slower +8.46 μs

##### With input 1KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.24 M        0.81 μs   ±497.19%        0.79 μs           1 μs
Baseline         0.82 M        1.23 μs   ±332.42%        1.21 μs        1.50 μs

Comparison:
Optimised        1.24 M
Baseline         0.82 M - 1.52x slower +0.42 μs

##### With input 1KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.17 M        0.86 μs   ±450.16%        0.83 μs        1.04 μs
Baseline         0.82 M        1.22 μs   ±327.99%        1.21 μs        1.50 μs

Comparison:
Optimised        1.17 M
Baseline         0.82 M - 1.42x slower +0.36 μs

##### With input 1KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.39 M        0.72 μs   ±537.77%        0.71 μs        0.88 μs
Baseline         0.81 M        1.23 μs   ±314.06%        1.21 μs        1.50 μs

Comparison:
Optimised        1.39 M
Baseline         0.81 M - 1.71x slower +0.51 μs

##### With input 1KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.62 M      381.04 ns  ±1060.99%         375 ns         500 ns
Baseline         1.54 M      649.87 ns   ±612.71%         625 ns         792 ns

Comparison:
Optimised        2.62 M
Baseline         1.54 M - 1.71x slower +268.84 ns

##### With input 1KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.39 M        0.72 μs   ±579.19%        0.71 μs        0.88 μs
Baseline         0.81 M        1.24 μs   ±326.41%        1.21 μs        1.50 μs

Comparison:
Optimised        1.39 M
Baseline         0.81 M - 1.73x slower +0.52 μs

##### With input 1MiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        863.47        1.16 ms     ±3.49%        1.16 ms        1.20 ms
Baseline         844.29        1.18 ms     ±4.12%        1.19 ms        1.23 ms

Comparison:
Optimised        863.47
Baseline         844.29 - 1.02x slower +0.0263 ms

##### With input 1MiB mixed valid #####
Name                ips        average  deviation         median         99th %
Baseline         841.98        1.19 ms     ±4.08%        1.19 ms        1.23 ms
Optimised        801.85        1.25 ms     ±2.95%        1.25 ms        1.28 ms

Comparison:
Baseline         841.98
Optimised        801.85 - 1.05x slower +0.0594 ms

##### With input 1MiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        874.27        1.14 ms     ±3.50%        1.14 ms        1.19 ms
Baseline         847.30        1.18 ms     ±3.89%        1.18 ms        1.22 ms

Comparison:
Optimised        874.27
Baseline         847.30 - 1.03x slower +0.0364 ms

##### With input 1MiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        1.75 K      570.58 μs     ±2.36%      569.34 μs      593.97 μs
Baseline         1.67 K      597.71 μs    ±21.07%      595.42 μs      620.06 μs

Comparison:
Optimised        1.75 K
Baseline         1.67 K - 1.05x slower +27.13 μs

##### With input 1MiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        872.97        1.15 ms     ±3.53%        1.14 ms        1.19 ms
Baseline         852.12        1.17 ms     ±3.97%        1.17 ms        1.23 ms

Comparison:
Optimised        872.97
Baseline         852.12 - 1.02x slower +0.0280 ms

========== valid64? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB invalid@end, 100KiB invalid@mid, 100KiB valid, 1KiB invalid@end, 1KiB invalid@mid, 1KiB valid, 1MiB invalid@end, 1MiB invalid@mid, 1MiB valid
Estimated total run time: 2 min 6 s
Excluding outliers: false


##### With input 100KiB invalid@end #####
Name                ips        average  deviation         median         99th %
Baseline        10.64 K       93.97 μs     ±7.45%       93.50 μs      104.21 μs
Optimised        7.33 K      136.38 μs     ±2.00%      136.13 μs      147.83 μs

Comparison:
Baseline        10.64 K
Optimised        7.33 K - 1.45x slower +42.41 μs

##### With input 100KiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Baseline        21.44 K       46.64 μs     ±6.61%       46.38 μs       53.75 μs
Optimised       16.19 K       61.77 μs     ±3.97%       61.54 μs       69.33 μs

Comparison:
Baseline        21.44 K
Optimised       16.19 K - 1.32x slower +15.13 μs

##### With input 100KiB valid #####
Name                ips        average  deviation         median         99th %
Baseline        10.65 K       93.91 μs     ±2.19%       93.46 μs      104.13 μs
Optimised        7.33 K      136.35 μs     ±2.05%      136.21 μs      147.88 μs

Comparison:
Baseline        10.65 K
Optimised        7.33 K - 1.45x slower +42.44 μs

##### With input 1KiB invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.26 M        0.79 μs   ±417.41%        0.79 μs        0.96 μs
Baseline         0.97 M        1.03 μs   ±323.22%           1 μs        1.25 μs

Comparison:
Optimised        1.26 M
Baseline         0.97 M - 1.30x slower +0.24 μs

##### With input 1KiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.35 M      425.61 ns   ±475.84%         417 ns         542 ns
Baseline         1.80 M      554.35 ns   ±329.90%         542 ns         667 ns

Comparison:
Optimised        2.35 M
Baseline         1.80 M - 1.30x slower +128.74 ns

##### With input 1KiB valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.26 M        0.79 μs   ±419.96%        0.79 μs        0.96 μs
Baseline         0.97 M        1.03 μs   ±305.07%           1 μs        1.25 μs

Comparison:
Optimised        1.26 M
Baseline         0.97 M - 1.30x slower +0.24 μs

##### With input 1MiB invalid@end #####
Name                ips        average  deviation         median         99th %
Baseline         1.04 K        0.97 ms     ±2.83%        0.97 ms        1.00 ms
Optimised        0.67 K        1.48 ms     ±1.25%        1.48 ms        1.52 ms

Comparison:
Baseline         1.04 K
Optimised        0.67 K - 1.54x slower +0.52 ms

##### With input 1MiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Baseline         2.07 K      483.42 μs     ±3.52%      482.54 μs      503.05 μs
Optimised        1.35 K      739.82 μs     ±1.84%      740.17 μs      770.14 μs

Comparison:
Baseline         2.07 K
Optimised        1.35 K - 1.53x slower +256.40 μs

##### With input 1MiB valid #####
Name                ips        average  deviation         median         99th %
Baseline         1.03 K        0.97 ms     ±2.57%        0.97 ms        0.99 ms
Optimised        0.67 K        1.49 ms     ±1.20%        1.49 ms        1.52 ms

Comparison:
Baseline         1.03 K
Optimised        0.67 K - 1.54x slower +0.52 ms

========== url_valid64? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB invalid@end, 100KiB invalid@mid, 100KiB valid, 1KiB invalid@end, 1KiB invalid@mid, 1KiB valid, 1MiB invalid@end, 1MiB invalid@mid, 1MiB valid
Estimated total run time: 2 min 6 s
Excluding outliers: false


##### With input 100KiB invalid@end #####
Name                ips        average  deviation         median         99th %
Baseline        10.62 K       94.17 μs    ±33.14%       93.46 μs      104.67 μs
Optimised        7.18 K      139.30 μs     ±2.41%      139.08 μs      151.36 μs

Comparison:
Baseline        10.62 K
Optimised        7.18 K - 1.48x slower +45.13 μs

##### With input 100KiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Baseline        21.41 K       46.70 μs     ±7.79%       46.42 μs       52.83 μs
Optimised       15.82 K       63.19 μs    ±49.69%       62.63 μs       70.96 μs

Comparison:
Baseline        21.41 K
Optimised       15.82 K - 1.35x slower +16.49 μs

##### With input 100KiB valid #####
Name                ips        average  deviation         median         99th %
Baseline        10.66 K       93.85 μs     ±2.67%       93.50 μs      104.21 μs
Optimised        7.18 K      139.26 μs     ±2.03%      139.04 μs      150.92 μs

Comparison:
Baseline        10.66 K
Optimised        7.18 K - 1.48x slower +45.41 μs

##### With input 1KiB invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.17 M        0.86 μs   ±418.44%        0.83 μs        1.04 μs
Baseline         0.96 M        1.04 μs   ±308.25%           1 μs        1.25 μs

Comparison:
Optimised        1.17 M
Baseline         0.96 M - 1.21x slower +0.182 μs

##### With input 1KiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.18 M      459.32 ns   ±431.07%         458 ns         583 ns
Baseline         1.80 M      557.08 ns   ±470.89%         542 ns         667 ns

Comparison:
Optimised        2.18 M
Baseline         1.80 M - 1.21x slower +97.76 ns

##### With input 1KiB valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.17 M        0.85 μs   ±378.54%        0.83 μs        1.04 μs
Baseline         0.97 M        1.03 μs   ±296.26%           1 μs        1.25 μs

Comparison:
Optimised        1.17 M
Baseline         0.97 M - 1.21x slower +0.182 μs

##### With input 1MiB invalid@end #####
Name                ips        average  deviation         median         99th %
Baseline         1.04 K        0.97 ms     ±2.21%        0.96 ms        1.00 ms
Optimised        0.66 K        1.52 ms     ±1.43%        1.53 ms        1.57 ms

Comparison:
Baseline         1.04 K
Optimised        0.66 K - 1.58x slower +0.56 ms

##### With input 1MiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Baseline         2.07 K      482.46 μs     ±3.21%      481.67 μs      506.46 μs
Optimised        1.31 K      763.34 μs     ±1.45%      761.04 μs      793.02 μs

Comparison:
Baseline         2.07 K
Optimised        1.31 K - 1.58x slower +280.87 μs

##### With input 1MiB valid #####
Name                ips        average  deviation         median         99th %
Baseline         1.04 K        0.97 ms     ±2.09%        0.96 ms        1.00 ms
Optimised        0.66 K        1.52 ms     ±1.41%        1.53 ms        1.56 ms

Comparison:
Baseline         1.04 K
Optimised        0.66 K - 1.58x slower +0.56 ms

Comment thread lib/elixir/lib/base.ex Outdated
@josevalim
Copy link
Copy Markdown
Member

Weirdly, I could not find any improvements in hex_valid32? although it has the same number of range checks as valid32?.

The results for hex_valid32? look good, no?

##### With input 1KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.24 M        0.81 μs   ±497.19%        0.79 μs           1 μs
Baseline         0.82 M        1.23 μs   ±332.42%        1.21 μs        1.50 μs

Comparison:
Optimised        1.24 M
Baseline         0.82 M - 1.52x slower +0.42 μs

##### With input 1KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.17 M        0.86 μs   ±450.16%        0.83 μs        1.04 μs
Baseline         0.82 M        1.22 μs   ±327.99%        1.21 μs        1.50 μs

Comparison:
Optimised        1.17 M
Baseline         0.82 M - 1.42x slower +0.36 μs

##### With input 1KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.39 M        0.72 μs   ±537.77%        0.71 μs        0.88 μs
Baseline         0.81 M        1.23 μs   ±314.06%        1.21 μs        1.50 μs

Comparison:
Optimised        1.39 M
Baseline         0.81 M - 1.71x slower +0.51 μs

Even if the results are not so great, I'd apply it to the hex ones for consistency and to reduce code branches!

@PJUllrich
Copy link
Copy Markdown
Contributor Author

@josevalim you sir are a genius! I removed the guard check on the 8th character and run the 7 bytes through SWAR now. That improved performance from e.g. 1.25x to 2.5x!

New Results
# valid16? equivalence check passed.

# valid32? equivalence check passed.

# hex_valid32? equivalence check passed.

# inputs64base 1MiB valid (random, ~3.1% +/): 43624 +/ in 1398104 bytes (3.12%)
# valid64? / url_valid64? equivalence check passed.


========== valid16? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB lower valid, 100KiB mixed valid, 100KiB upper invalid@end, 100KiB upper invalid@mid, 100KiB upper valid, 1KiB lower valid, 1KiB mixed valid, 1KiB upper invalid@end, 1KiB upper invalid@mid, 1KiB upper valid, 1MiB lower valid, 1MiB mixed valid, 1MiB upper invalid@end, 1MiB upper invalid@mid, 1MiB upper valid
Estimated total run time: 3 min 30 s
Excluding outliers: false


##### With input 100KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised       13.28 K       75.31 μs     ±9.41%       74.88 μs       85.75 μs
Baseline         5.38 K      185.77 μs     ±5.76%      181.38 μs      217.88 μs

Comparison:
Optimised       13.28 K
Baseline         5.38 K - 2.47x slower +110.46 μs

##### With input 100KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised       11.42 K       87.56 μs     ±7.28%       87.04 μs       98.50 μs
Baseline         5.39 K      185.65 μs     ±3.53%      186.17 μs      204.42 μs

Comparison:
Optimised       11.42 K
Baseline         5.39 K - 2.12x slower +98.08 μs

##### With input 100KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised       13.32 K       75.07 μs     ±9.38%       74.75 μs       85.25 μs
Baseline         5.42 K      184.66 μs     ±3.80%      181.17 μs      208.63 μs

Comparison:
Optimised       13.32 K
Baseline         5.42 K - 2.46x slower +109.58 μs

##### With input 100KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised       26.79 K       37.32 μs     ±7.74%       37.08 μs       44.13 μs
Baseline        10.75 K       93.04 μs     ±3.89%       93.04 μs      104.57 μs

Comparison:
Optimised       26.79 K
Baseline        10.75 K - 2.49x slower +55.72 μs

##### With input 100KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised       13.25 K       75.47 μs    ±17.37%       74.67 μs       87.76 μs
Baseline         5.43 K      184.18 μs     ±3.13%      181.33 μs      203.75 μs

Comparison:
Optimised       13.25 K
Baseline         5.43 K - 2.44x slower +108.71 μs

##### With input 1KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.23 M        0.81 μs   ±288.10%        0.79 μs           1 μs
Baseline         0.52 M        1.91 μs   ±154.18%        1.88 μs        2.38 μs

Comparison:
Optimised        1.23 M
Baseline         0.52 M - 2.36x slower +1.10 μs

##### With input 1KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.07 M        0.94 μs   ±266.25%        0.92 μs        1.13 μs
Baseline         0.52 M        1.93 μs   ±254.09%        1.88 μs        2.42 μs

Comparison:
Optimised        1.07 M
Baseline         0.52 M - 2.06x slower +1.00 μs

##### With input 1KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.24 M        0.81 μs   ±290.03%        0.79 μs           1 μs
Baseline         0.53 M        1.89 μs   ±145.47%        1.88 μs        2.38 μs

Comparison:
Optimised        1.24 M
Baseline         0.53 M - 2.34x slower +1.08 μs

##### With input 1KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.35 M      424.77 ns   ±529.79%         417 ns         542 ns
Baseline         1.02 M      976.63 ns   ±265.49%         958 ns        1209 ns

Comparison:
Optimised        2.35 M
Baseline         1.02 M - 2.30x slower +551.86 ns

##### With input 1KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.24 M        0.80 μs   ±316.59%        0.79 μs        0.96 μs
Baseline         0.52 M        1.91 μs   ±150.20%        1.88 μs        2.38 μs

Comparison:
Optimised        1.24 M
Baseline         0.52 M - 2.38x slower +1.11 μs

##### With input 1MiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.30 K        0.77 ms     ±8.39%        0.77 ms        0.81 ms
Baseline         0.52 K        1.91 ms     ±2.89%        1.91 ms        1.99 ms

Comparison:
Optimised        1.30 K
Baseline         0.52 K - 2.48x slower +1.14 ms

##### With input 1MiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.11 K        0.90 ms     ±5.53%        0.89 ms        0.96 ms
Baseline         0.52 K        1.92 ms     ±2.75%        1.92 ms        1.98 ms

Comparison:
Optimised        1.11 K
Baseline         0.52 K - 2.13x slower +1.02 ms

##### With input 1MiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.30 K        0.77 ms     ±7.47%        0.77 ms        0.81 ms
Baseline         0.52 K        1.92 ms     ±2.75%        1.92 ms        1.98 ms

Comparison:
Optimised        1.30 K
Baseline         0.52 K - 2.49x slower +1.15 ms

##### With input 1MiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.60 K      384.85 μs     ±7.57%      383.13 μs      408.61 μs
Baseline         1.05 K      955.99 μs     ±3.25%      954.33 μs     1000.91 μs

Comparison:
Optimised        2.60 K
Baseline         1.05 K - 2.48x slower +571.14 μs

##### With input 1MiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.30 K        0.77 ms     ±8.39%        0.77 ms        0.81 ms
Baseline         0.52 K        1.92 ms     ±2.38%        1.92 ms        2.01 ms

Comparison:
Optimised        1.30 K
Baseline         0.52 K - 2.49x slower +1.15 ms

========== valid32? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB lower valid, 100KiB mixed valid, 100KiB upper invalid@end, 100KiB upper invalid@mid, 100KiB upper valid, 1KiB lower valid, 1KiB mixed valid, 1KiB upper invalid@end, 1KiB upper invalid@mid, 1KiB upper valid, 1MiB lower valid, 1MiB mixed valid, 1MiB upper invalid@end, 1MiB upper invalid@mid, 1MiB upper valid
Estimated total run time: 3 min 30 s
Excluding outliers: false


##### With input 100KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised       16.59 K       60.27 μs     ±4.44%       59.92 μs       68.42 μs
Baseline         8.84 K      113.16 μs     ±7.42%      112.50 μs      126.45 μs

Comparison:
Optimised       16.59 K
Baseline         8.84 K - 1.88x slower +52.88 μs

##### With input 100KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised       14.25 K       70.19 μs     ±4.15%       69.79 μs       79.29 μs
Baseline         8.84 K      113.12 μs     ±3.93%      112.54 μs      125.71 μs

Comparison:
Optimised       14.25 K
Baseline         8.84 K - 1.61x slower +42.93 μs

##### With input 100KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised       16.58 K       60.30 μs     ±4.84%       59.88 μs       69.79 μs
Baseline         8.84 K      113.14 μs     ±2.84%      112.54 μs      125.96 μs

Comparison:
Optimised       16.58 K
Baseline         8.84 K - 1.88x slower +52.84 μs

##### With input 100KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised       33.43 K       29.92 μs    ±11.16%       29.71 μs       35.92 μs
Baseline        17.89 K       55.89 μs     ±4.27%       55.63 μs       64.08 μs

Comparison:
Optimised       33.43 K
Baseline        17.89 K - 1.87x slower +25.97 μs

##### With input 100KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised       16.49 K       60.66 μs    ±26.19%       59.88 μs       70.58 μs
Baseline         8.80 K      113.64 μs    ±21.35%      112.46 μs      127.24 μs

Comparison:
Optimised       16.49 K
Baseline         8.80 K - 1.87x slower +52.97 μs

##### With input 1KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.41 M        0.71 μs   ±530.24%        0.71 μs        0.88 μs
Baseline         0.82 M        1.23 μs   ±315.47%        1.21 μs        1.50 μs

Comparison:
Optimised        1.41 M
Baseline         0.82 M - 1.72x slower +0.51 μs

##### With input 1KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.24 M        0.81 μs   ±491.53%        0.79 μs           1 μs
Baseline         0.82 M        1.22 μs   ±342.29%        1.21 μs        1.50 μs

Comparison:
Optimised        1.24 M
Baseline         0.82 M - 1.52x slower +0.42 μs

##### With input 1KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.41 M        0.71 μs   ±505.20%        0.67 μs        0.88 μs
Baseline         0.82 M        1.22 μs   ±310.68%        1.21 μs        1.50 μs

Comparison:
Optimised        1.41 M
Baseline         0.82 M - 1.73x slower +0.52 μs

##### With input 1KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.55 M      391.43 ns  ±1001.20%         375 ns         500 ns
Baseline         1.54 M      650.52 ns   ±631.57%         625 ns         792 ns

Comparison:
Optimised        2.55 M
Baseline         1.54 M - 1.66x slower +259.09 ns

##### With input 1KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.42 M        0.70 μs   ±537.09%        0.67 μs        0.88 μs
Baseline         0.82 M        1.22 μs   ±324.03%        1.21 μs        1.50 μs

Comparison:
Optimised        1.42 M
Baseline         0.82 M - 1.74x slower +0.52 μs

##### With input 1MiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.63 K        0.61 ms     ±4.21%        0.61 ms        0.64 ms
Baseline         0.86 K        1.17 ms    ±11.69%        1.16 ms        1.23 ms

Comparison:
Optimised        1.63 K
Baseline         0.86 K - 1.90x slower +0.55 ms

##### With input 1MiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.39 K        0.72 ms     ±1.32%        0.71 ms        0.75 ms
Baseline         0.86 K        1.16 ms     ±2.29%        1.16 ms        1.21 ms

Comparison:
Optimised        1.39 K
Baseline         0.86 K - 1.62x slower +0.45 ms

##### With input 1MiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.63 K        0.61 ms     ±4.13%        0.61 ms        0.65 ms
Baseline         0.86 K        1.16 ms     ±3.04%        1.16 ms        1.22 ms

Comparison:
Optimised        1.63 K
Baseline         0.86 K - 1.89x slower +0.55 ms

##### With input 1MiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        3.25 K      307.28 μs     ±4.37%      306.17 μs      329.08 μs
Baseline         1.73 K      579.54 μs     ±1.63%      578.04 μs      609.63 μs

Comparison:
Optimised        3.25 K
Baseline         1.73 K - 1.89x slower +272.25 μs

##### With input 1MiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.63 K        0.61 ms     ±4.29%        0.61 ms        0.64 ms
Baseline         0.86 K        1.17 ms    ±10.08%        1.16 ms        1.22 ms

Comparison:
Optimised        1.63 K
Baseline         0.86 K - 1.90x slower +0.55 ms

========== hex_valid32? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB lower valid, 100KiB mixed valid, 100KiB upper invalid@end, 100KiB upper invalid@mid, 100KiB upper valid, 1KiB lower valid, 1KiB mixed valid, 1KiB upper invalid@end, 1KiB upper invalid@mid, 1KiB upper valid, 1MiB lower valid, 1MiB mixed valid, 1MiB upper invalid@end, 1MiB upper invalid@mid, 1MiB upper valid
Estimated total run time: 3 min 30 s
Excluding outliers: false


##### With input 100KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised       16.58 K       60.32 μs    ±11.63%       59.71 μs       70.25 μs
Baseline         8.83 K      113.24 μs     ±7.37%      112.50 μs      126.79 μs

Comparison:
Optimised       16.58 K
Baseline         8.83 K - 1.88x slower +52.93 μs

##### With input 100KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised       14.18 K       70.51 μs    ±18.24%       69.79 μs       80.54 μs
Baseline         8.82 K      113.36 μs     ±4.52%      112.54 μs      128.03 μs

Comparison:
Optimised       14.18 K
Baseline         8.82 K - 1.61x slower +42.86 μs

##### With input 100KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised       16.61 K       60.21 μs     ±4.69%       59.83 μs       68.29 μs
Baseline         8.84 K      113.15 μs     ±2.88%      112.50 μs      126.29 μs

Comparison:
Optimised       16.61 K
Baseline         8.84 K - 1.88x slower +52.94 μs

##### With input 100KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised       33.51 K       29.84 μs    ±10.66%       29.63 μs       35.96 μs
Baseline        17.82 K       56.11 μs     ±4.77%       55.71 μs       65.83 μs

Comparison:
Optimised       33.51 K
Baseline        17.82 K - 1.88x slower +26.26 μs

##### With input 100KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised       16.58 K       60.32 μs     ±4.52%       59.92 μs       70.21 μs
Baseline         8.79 K      113.78 μs    ±14.85%      112.54 μs      127.08 μs

Comparison:
Optimised       16.58 K
Baseline         8.79 K - 1.89x slower +53.46 μs

##### With input 1KiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.42 M        0.71 μs   ±592.70%        0.67 μs        0.88 μs
Baseline         0.81 M        1.23 μs   ±315.14%        1.21 μs        1.50 μs

Comparison:
Optimised        1.42 M
Baseline         0.81 M - 1.74x slower +0.52 μs

##### With input 1KiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.24 M        0.80 μs   ±476.93%        0.79 μs        0.96 μs
Baseline         0.82 M        1.23 μs   ±278.28%        1.21 μs        1.50 μs

Comparison:
Optimised        1.24 M
Baseline         0.82 M - 1.52x slower +0.42 μs

##### With input 1KiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.41 M        0.71 μs   ±524.09%        0.71 μs        0.88 μs
Baseline         0.82 M        1.23 μs   ±326.13%        1.21 μs        1.50 μs

Comparison:
Optimised        1.41 M
Baseline         0.82 M - 1.73x slower +0.52 μs

##### With input 1KiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.54 M      394.13 ns  ±1090.65%         375 ns         500 ns
Baseline         1.54 M      650.38 ns   ±615.74%         625 ns         792 ns

Comparison:
Optimised        2.54 M
Baseline         1.54 M - 1.65x slower +256.25 ns

##### With input 1KiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.41 M        0.71 μs   ±577.89%        0.71 μs        0.88 μs
Baseline         0.81 M        1.23 μs   ±303.27%        1.21 μs        1.50 μs

Comparison:
Optimised        1.41 M
Baseline         0.81 M - 1.74x slower +0.52 μs

##### With input 1MiB lower valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.62 K        0.62 ms     ±4.20%        0.61 ms        0.65 ms
Baseline         0.86 K        1.16 ms     ±2.48%        1.16 ms        1.22 ms

Comparison:
Optimised        1.62 K
Baseline         0.86 K - 1.89x slower +0.54 ms

##### With input 1MiB mixed valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.39 K        0.72 ms    ±11.46%        0.71 ms        0.76 ms
Baseline         0.86 K        1.16 ms     ±2.32%        1.16 ms        1.21 ms

Comparison:
Optimised        1.39 K
Baseline         0.86 K - 1.61x slower +0.44 ms

##### With input 1MiB upper invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.62 K        0.62 ms     ±5.61%        0.61 ms        0.65 ms
Baseline         0.86 K        1.16 ms     ±1.30%        1.16 ms        1.21 ms

Comparison:
Optimised        1.62 K
Baseline         0.86 K - 1.89x slower +0.55 ms

##### With input 1MiB upper invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        3.24 K      308.56 μs     ±6.15%      307.25 μs      330.09 μs
Baseline         1.72 K      581.21 μs     ±2.91%      578.29 μs      612.13 μs

Comparison:
Optimised        3.24 K
Baseline         1.72 K - 1.88x slower +272.65 μs

##### With input 1MiB upper valid #####
Name                ips        average  deviation         median         99th %
Optimised        1.62 K        0.62 ms     ±4.21%        0.61 ms        0.65 ms
Baseline         0.86 K        1.16 ms     ±1.41%        1.16 ms        1.22 ms

Comparison:
Optimised        1.62 K
Baseline         0.86 K - 1.89x slower +0.55 ms

========== valid64? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB alnum valid (0% +/), 100KiB invalid@end, 100KiB invalid@mid, 100KiB valid (random, ~3.1% +/), 1KiB alnum valid (0% +/), 1KiB invalid@end, 1KiB invalid@mid, 1KiB valid (random, ~3.1% +/), 1MiB alnum valid (0% +/), 1MiB invalid@end, 1MiB invalid@mid, 1MiB valid (random, ~3.1% +/)
Estimated total run time: 2 min 48 s
Excluding outliers: false


##### With input 100KiB alnum valid (0% +/) #####
Name                ips        average  deviation         median         99th %
Optimised       15.55 K       64.32 μs     ±5.60%       63.88 μs       73.33 μs
Baseline        10.63 K       94.08 μs     ±5.59%       93.46 μs      106.04 μs

Comparison:
Optimised       15.55 K
Baseline        10.63 K - 1.46x slower +29.76 μs

##### With input 100KiB invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised       15.54 K       64.33 μs     ±5.01%       63.92 μs       72.75 μs
Baseline        10.63 K       94.11 μs     ±3.47%       93.46 μs      106.67 μs

Comparison:
Optimised       15.54 K
Baseline        10.63 K - 1.46x slower +29.78 μs

##### With input 100KiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised       31.17 K       32.08 μs     ±8.04%       31.83 μs       38.46 μs
Baseline        21.30 K       46.95 μs    ±39.90%       46.42 μs       54.83 μs

Comparison:
Optimised       31.17 K
Baseline        21.30 K - 1.46x slower +14.87 μs

##### With input 100KiB valid (random, ~3.1% +/) #####
Name                ips        average  deviation         median         99th %
Optimised       15.54 K       64.35 μs     ±5.46%       63.92 μs       72.92 μs
Baseline        10.65 K       93.94 μs     ±2.75%       93.38 μs      105.13 μs

Comparison:
Optimised       15.54 K
Baseline        10.65 K - 1.46x slower +29.59 μs

##### With input 1KiB alnum valid (0% +/) #####
Name                ips        average  deviation         median         99th %
Optimised        1.33 M        0.75 μs   ±242.48%        0.75 μs        0.92 μs
Baseline         0.96 M        1.04 μs   ±313.90%           1 μs        1.25 μs

Comparison:
Optimised        1.33 M
Baseline         0.96 M - 1.38x slower +0.29 μs

##### With input 1KiB invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.34 M        0.75 μs   ±264.24%        0.71 μs        0.92 μs
Baseline         0.97 M        1.03 μs   ±306.17%           1 μs        1.25 μs

Comparison:
Optimised        1.34 M
Baseline         0.97 M - 1.39x slower +0.29 μs

##### With input 1KiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.43 M      411.11 ns   ±518.96%         416 ns         500 ns
Baseline         1.80 M      556.38 ns   ±327.69%         542 ns         667 ns

Comparison:
Optimised        2.43 M
Baseline         1.80 M - 1.35x slower +145.26 ns

##### With input 1KiB valid (random, ~3.1% +/) #####
Name                ips        average  deviation         median         99th %
Optimised        1.34 M        0.74 μs   ±308.43%        0.71 μs        0.92 μs
Baseline         0.97 M        1.03 μs   ±278.74%           1 μs        1.25 μs

Comparison:
Optimised        1.34 M
Baseline         0.97 M - 1.39x slower +0.29 μs

##### With input 1MiB alnum valid (0% +/) #####
Name                ips        average  deviation         median         99th %
Optimised        1.52 K      659.92 μs     ±6.55%      655.92 μs      701.55 μs
Baseline         1.03 K      970.00 μs     ±4.21%      966.17 μs     1021.03 μs

Comparison:
Optimised        1.52 K
Baseline         1.03 K - 1.47x slower +310.08 μs

##### With input 1MiB invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.51 K      661.98 μs    ±11.03%      655.96 μs      698.59 μs
Baseline         1.03 K      968.15 μs     ±4.12%      965.21 μs     1013.63 μs

Comparison:
Optimised        1.51 K
Baseline         1.03 K - 1.46x slower +306.17 μs

##### With input 1MiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        3.03 K      329.88 μs     ±6.05%      327.71 μs      353.48 μs
Baseline         2.07 K      483.91 μs     ±3.36%      481.42 μs      513.39 μs

Comparison:
Optimised        3.03 K
Baseline         2.07 K - 1.47x slower +154.03 μs

##### With input 1MiB valid (random, ~3.1% +/) #####
Name                ips        average  deviation         median         99th %
Optimised        1.52 K      659.62 μs     ±6.69%      656.17 μs      692.43 μs
Baseline         1.03 K      968.72 μs     ±4.17%      966.13 μs     1018.10 μs

Comparison:
Optimised        1.52 K
Baseline         1.03 K - 1.47x slower +309.10 μs

========== url_valid64? benchmark ==========

Operating System: macOS
CPU Information: Apple M2 Pro
Number of Available Cores: 10
Available memory: 32 GB
Elixir 1.19.4
Erlang 28.2
JIT enabled: true

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 0 ns
reduction time: 0 ns
parallel: 1
inputs: 100KiB alnum valid (0% -_), 100KiB invalid@end, 100KiB invalid@mid, 100KiB valid (random, ~3.1% -_), 1KiB alnum valid (0% -_), 1KiB invalid@end, 1KiB invalid@mid, 1KiB valid (random, ~3.1% -_), 1MiB alnum valid (0% -_), 1MiB invalid@end, 1MiB invalid@mid, 1MiB valid (random, ~3.1% -_)
Estimated total run time: 2 min 48 s
Excluding outliers: false


##### With input 100KiB alnum valid (0% -_) #####
Name                ips        average  deviation         median         99th %
Optimised       14.29 K       70.00 μs     ±4.72%       69.63 μs       79.54 μs
Baseline        10.62 K       94.14 μs     ±6.21%       93.46 μs      106.00 μs

Comparison:
Optimised       14.29 K
Baseline        10.62 K - 1.34x slower +24.14 μs

##### With input 100KiB invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised       14.25 K       70.16 μs     ±4.38%       69.75 μs       79.29 μs
Baseline        10.64 K       93.97 μs     ±5.43%       93.46 μs      105.17 μs

Comparison:
Optimised       14.25 K
Baseline        10.64 K - 1.34x slower +23.81 μs

##### With input 100KiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised       28.63 K       34.92 μs     ±6.93%       34.67 μs       41.38 μs
Baseline        21.40 K       46.73 μs     ±5.16%       46.42 μs       54.10 μs

Comparison:
Optimised       28.63 K
Baseline        21.40 K - 1.34x slower +11.81 μs

##### With input 100KiB valid (random, ~3.1% -_) #####
Name                ips        average  deviation         median         99th %
Optimised       14.25 K       70.15 μs     ±4.36%       69.71 μs       79.42 μs
Baseline        10.66 K       93.80 μs     ±2.71%       93.33 μs      105.00 μs

Comparison:
Optimised       14.25 K
Baseline        10.66 K - 1.34x slower +23.65 μs

##### With input 1KiB alnum valid (0% -_) #####
Name                ips        average  deviation         median         99th %
Optimised        1.25 M        0.80 μs   ±238.04%        0.79 μs        0.96 μs
Baseline         0.96 M        1.04 μs   ±275.58%           1 μs        1.25 μs

Comparison:
Optimised        1.25 M
Baseline         0.96 M - 1.30x slower +0.24 μs

##### With input 1KiB invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.24 M        0.81 μs   ±229.56%        0.79 μs           1 μs
Baseline         0.96 M        1.04 μs   ±304.93%           1 μs        1.25 μs

Comparison:
Optimised        1.24 M
Baseline         0.96 M - 1.28x slower +0.23 μs

##### With input 1KiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.27 M      439.93 ns   ±465.35%         417 ns         542 ns
Baseline         1.80 M      556.76 ns   ±458.28%         542 ns         667 ns

Comparison:
Optimised        2.27 M
Baseline         1.80 M - 1.27x slower +116.83 ns

##### With input 1KiB valid (random, ~3.1% -_) #####
Name                ips        average  deviation         median         99th %
Optimised        1.25 M        0.80 μs   ±230.72%        0.79 μs        0.96 μs
Baseline         0.97 M        1.03 μs   ±282.92%           1 μs        1.25 μs

Comparison:
Optimised        1.25 M
Baseline         0.97 M - 1.29x slower +0.23 μs

##### With input 1MiB alnum valid (0% -_) #####
Name                ips        average  deviation         median         99th %
Optimised        1.39 K      718.41 μs     ±6.13%      714.50 μs      754.58 μs
Baseline         1.03 K      968.36 μs     ±4.14%      966.17 μs     1012.95 μs

Comparison:
Optimised        1.39 K
Baseline         1.03 K - 1.35x slower +249.96 μs

##### With input 1MiB invalid@end #####
Name                ips        average  deviation         median         99th %
Optimised        1.39 K      718.11 μs     ±6.39%      714.38 μs      752.68 μs
Baseline         1.03 K      968.79 μs     ±4.02%      966.38 μs     1009.87 μs

Comparison:
Optimised        1.39 K
Baseline         1.03 K - 1.35x slower +250.68 μs

##### With input 1MiB invalid@mid #####
Name                ips        average  deviation         median         99th %
Optimised        2.77 K      361.35 μs    ±12.95%      357.50 μs      383.58 μs
Baseline         2.07 K      484.18 μs     ±3.90%      482.04 μs      511.53 μs

Comparison:
Optimised        2.77 K
Baseline         2.07 K - 1.34x slower +122.83 μs

##### With input 1MiB valid (random, ~3.1% -_) #####
Name                ips        average  deviation         median         99th %
Optimised        1.39 K      717.12 μs     ±3.04%      714.63 μs      754.39 μs
Baseline         1.03 K      969.16 μs     ±4.21%      966.15 μs     1021.50 μs

Comparison:
Optimised        1.39 K
Baseline         1.03 K - 1.35x slower +252.04 μs

@PJUllrich
Copy link
Copy Markdown
Contributor Author

Updated Benchmark
# SWAR exploration bench for Base.valid* functions.
#
# Run with the SYSTEM elixir from anywhere:
#   elixir bench_base_swar.ex                  # all benchmarks
#   elixir bench_base_swar.ex valid64          # just valid64?
#   elixir bench_base_swar.ex valid64 valid32  # both
#
# Recognised names: valid16, valid32, hex_valid32, valid64, url_valid64.
# Equivalence checks always run regardless of selection (they're fast).
#
# Self-contained: the BASELINE is a copy of the current valid* logic from
# lib/elixir/lib/base.ex, inlined into a `Baseline` module so the bench does
# not depend on `make stdlib`. The SWAR variants live in `Optimised`, in the
# same style as PR #15255.

Mix.install([{:benchee, "~> 1.5"}])

selected = System.argv()
run? = fn name -> selected == [] or name in selected end

defmodule Baseline do
  @moduledoc false

  # Mirrors the b16 setup in lib/elixir/lib/base.ex.
  b16_alphabet = ~c"0123456789ABCDEF"

  to_lower_dec =
    &Enum.map(&1, fn {encoding, value} = pair ->
      if encoding in ?A..?Z do
        {encoding - ?A + ?a, value}
      else
        pair
      end
    end)

  to_mixed_dec =
    &Enum.flat_map(&1, fn {encoding, value} = pair ->
      if encoding in ?A..?Z do
        [pair, {encoding - ?A + ?a, value}]
      else
        [pair]
      end
    end)

  to_decode_list = fn alphabet ->
    alphabet = Enum.sort(alphabet)
    map = Map.new(alphabet)
    {min, _} = List.first(alphabet)
    {max, _} = List.last(alphabet)
    {min, Enum.map(min..max, &map[&1])}
  end

  def valid16?(string, opts \\ [])

  def valid16?(string, opts) when is_binary(string) and rem(byte_size(string), 2) == 0 do
    case Keyword.get(opts, :case, :upper) do
      :upper -> validate16upper?(string)
      :lower -> validate16lower?(string)
      :mixed -> validate16mixed?(string)
    end
  end

  def valid16?(string, _opts) when is_binary(string) do
    false
  end

  upper = Enum.with_index(b16_alphabet)

  for {base, alphabet} <- [upper: upper, lower: to_lower_dec.(upper), mixed: to_mixed_dec.(upper)] do
    validate_name = :"validate16#{base}?"
    valid_char_name = :"valid_char16#{base}?"

    {min, decoded} = to_decode_list.(alphabet)

    defp unquote(validate_name)(<<>>), do: true

    defp unquote(validate_name)(<<c1, c2, c3, c4, c5, c6, c7, c8, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(valid_char_name)(c3) and
        unquote(valid_char_name)(c4) and
        unquote(valid_char_name)(c5) and
        unquote(valid_char_name)(c6) and
        unquote(valid_char_name)(c7) and
        unquote(valid_char_name)(c8) and
        unquote(validate_name)(rest)
    end

    defp unquote(validate_name)(<<c1, c2, c3, c4, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(valid_char_name)(c3) and
        unquote(valid_char_name)(c4) and
        unquote(validate_name)(rest)
    end

    defp unquote(validate_name)(<<c1, c2, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(validate_name)(rest)
    end

    defp unquote(validate_name)(<<_char, _rest::binary>>), do: false

    @compile {:inline, [{valid_char_name, 1}]}
    defp unquote(valid_char_name)(char)
         when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
         do: true

    defp unquote(valid_char_name)(_char), do: false
  end

  # --- base32 (mirrors lib/elixir/lib/base.ex valid32? machinery) ----------

  b32_alphabet = ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"
  b32hex_alphabet = ~c"0123456789ABCDEFGHIJKLMNOPQRSTUV"
  upper32 = Enum.with_index(b32_alphabet)
  hexupper32 = Enum.with_index(b32hex_alphabet)

  def valid32?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)

    case Keyword.get(opts, :case, :upper) do
      :upper -> validate32upper?(string, pad?)
      :lower -> validate32lower?(string, pad?)
      :mixed -> validate32mixed?(string, pad?)
    end
  end

  def hex_valid32?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)

    case Keyword.get(opts, :case, :upper) do
      :upper -> validate32hexupper?(string, pad?)
      :lower -> validate32hexlower?(string, pad?)
      :mixed -> validate32hexmixed?(string, pad?)
    end
  end

  for {base, alphabet} <- [
        upper: upper32,
        lower: to_lower_dec.(upper32),
        mixed: to_mixed_dec.(upper32),
        hexupper: hexupper32,
        hexlower: to_lower_dec.(hexupper32),
        hexmixed: to_mixed_dec.(hexupper32)
      ] do
    validate_name = :"validate32#{base}?"
    validate_main_name = :"validate_main32#{base}?"
    valid_char_name = :"valid_char32#{base}?"
    {min, decoded} = to_decode_list.(alphabet)

    defp unquote(validate_main_name)(<<>>), do: true

    defp unquote(validate_main_name)(<<c1, c2, c3, c4, c5, c6, c7, c8, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(valid_char_name)(c3) and
        unquote(valid_char_name)(c4) and
        unquote(valid_char_name)(c5) and
        unquote(valid_char_name)(c6) and
        unquote(valid_char_name)(c7) and
        unquote(valid_char_name)(c8) and
        unquote(validate_main_name)(rest)
    end

    defp unquote(validate_name)(<<>>, _pad?), do: true

    defp unquote(validate_name)(string, pad?) do
      segs = div(byte_size(string) + 7, 8) - 1
      <<main::size(^segs)-binary-unit(64), rest::binary>> = string
      main_valid? = unquote(validate_main_name)(main)

      case rest do
        _ when not main_valid? ->
          false

        <<c1, c2, ?=, ?=, ?=, ?=, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2)

        <<c1, c2, c3, c4, ?=, ?=, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4)

        <<c1, c2, c3, c4, c5, ?=, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5)

        <<c1, c2, c3, c4, c5, c6, c7, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7)

        <<c1, c2, c3, c4, c5, c6, c7, c8>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7) and unquote(valid_char_name)(c8)

        <<c1, c2>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2)

        <<c1, c2, c3, c4>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4)

        <<c1, c2, c3, c4, c5>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5)

        <<c1, c2, c3, c4, c5, c6, c7>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7)

        _ ->
          false
      end
    end

    @compile {:inline, [{valid_char_name, 1}]}
    defp unquote(valid_char_name)(char)
         when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
         do: true

    defp unquote(valid_char_name)(_char), do: false
  end

  # --- base64 (mirrors lib/elixir/lib/base.ex valid64?/url_valid64? machinery)

  b64_alphabet = ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
  b64url_alphabet = ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"

  defp remove_ignored(string, nil), do: string

  defp remove_ignored(string, :whitespace) do
    for <<char::8 <- string>>, char not in ~c"\s\t\r\n", into: <<>>, do: <<char::8>>
  end

  def valid64?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)
    string |> remove_ignored(opts[:ignore]) |> validate64base?(pad?)
  end

  def url_valid64?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)
    string |> remove_ignored(opts[:ignore]) |> validate64url?(pad?)
  end

  for {base, alphabet} <- [base: b64_alphabet, url: b64url_alphabet] do
    validate_name = :"validate64#{base}?"
    validate_main_name = :"validate_main64#{base}?"
    valid_char_name = :"valid_char64#{base}?"
    {min, decoded} = alphabet |> Enum.with_index() |> to_decode_list.()

    defp unquote(validate_main_name)(<<>>), do: true

    defp unquote(validate_main_name)(<<c1, c2, c3, c4, c5, c6, c7, c8, rest::binary>>) do
      unquote(valid_char_name)(c1) and
        unquote(valid_char_name)(c2) and
        unquote(valid_char_name)(c3) and
        unquote(valid_char_name)(c4) and
        unquote(valid_char_name)(c5) and
        unquote(valid_char_name)(c6) and
        unquote(valid_char_name)(c7) and
        unquote(valid_char_name)(c8) and
        unquote(validate_main_name)(rest)
    end

    defp unquote(validate_name)(<<>>, _pad?), do: true

    defp unquote(validate_name)(string, pad?) do
      segs = div(byte_size(string) + 7, 8) - 1
      <<main::size(^segs)-binary-unit(64), rest::binary>> = string
      main_valid? = unquote(validate_main_name)(main)

      case rest do
        _ when not main_valid? ->
          false

        <<c1, c2, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2)

        <<c1, c2, c3, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3)

        <<c1, c2, c3, c4>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4)

        <<c1, c2, c3, c4, c5, c6, ?=, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6)

        <<c1, c2, c3, c4, c5, c6, c7, ?=>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7)

        <<c1, c2, c3, c4, c5, c6, c7, c8>> ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7) and unquote(valid_char_name)(c8)

        <<c1, c2>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2)

        <<c1, c2, c3>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3)

        <<c1, c2, c3, c4, c5, c6>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6)

        <<c1, c2, c3, c4, c5, c6, c7>> when not pad? ->
          unquote(valid_char_name)(c1) and unquote(valid_char_name)(c2) and
            unquote(valid_char_name)(c3) and unquote(valid_char_name)(c4) and
            unquote(valid_char_name)(c5) and unquote(valid_char_name)(c6) and
            unquote(valid_char_name)(c7)

        _ ->
          false
      end
    end

    @compile {:inline, [{valid_char_name, 1}]}
    defp unquote(valid_char_name)(char)
         when elem({unquote_splicing(decoded)}, char - unquote(min)) != nil,
         do: true

    defp unquote(valid_char_name)(_char), do: false
  end
end

defmodule Optimised do
  @moduledoc false
  import Bitwise

  # SWAR-optimised valid16?/2 and valid32?/2 for :upper, :lower and :mixed.
  #
  # 56 bits = largest integer that fits in a BEAM small int (fixnum range is
  # 59-bit signed on 64-bit OTP). At 64 bits, every `w + 0x80..` would
  # allocate a bignum on the heap and the optimisation collapses.
  # See https://github.com/erlang/otp/pull/10938.
  @swar_mask80 0x80808080808080

  # Per-range SWAR constants, broadcast across 7 lanes. Naming convention:
  #   @swar_ge_X = 0x80 - X  → (w + @swar_ge_X) has high bit set iff byte ≥ X
  #   @swar_gt_X = 0x7F - X  → (w + @swar_gt_X) has high bit set iff byte > X
  # A byte is in range [lo, hi] iff (bxor(w + @swar_ge_lo, w + @swar_gt_hi))
  # has its high bit set.
  @swar_ge_0 0x50505050505050
  @swar_gt_9 0x46464646464646
  @swar_ge_2 0x4E4E4E4E4E4E4E
  @swar_gt_7 0x48484848484848
  @swar_ge_A 0x3F3F3F3F3F3F3F
  @swar_gt_F 0x39393939393939
  @swar_gt_V 0x29292929292929
  @swar_gt_Z 0x25252525252525
  @swar_ge_a 0x1F1F1F1F1F1F1F
  @swar_gt_f 0x19191919191919
  @swar_gt_v 0x09090909090909
  @swar_gt_z 0x05050505050505

  # Mycroft zero-byte detection for base64 singletons (+, -, _).
  # Per lane: high bit set iff `bxor(w, K*ones) - 0x01..01` has its high bit
  # set, i.e. that byte's V value was 0 → original byte was K. Simplified
  # (no `bnot V` term) — for ASCII-gated `w`, borrow propagation false
  # positives only occur for adjacent bytes that happen to equal `K xor 0x01`,
  # which is outside the base64 alphabet, so it never matters here.
  # Pattern follows https://github.com/elixir-lang/elixir/pull/15255.
  @swar_mask01 0x01010101010101
  @swar_plus_x7 0x2B2B2B2B2B2B2B
  @swar_dash_x7 0x2D2D2D2D2D2D2D
  @swar_under_x7 0x5F5F5F5F5F5F5F

  # For base64 standard, '/' (0x2F) sits exactly one below '0' (0x30), so we
  # extend the digit range to [0x2F, 0x39] which catches '/' as part of one
  # range check — saves one Mycroft singleton. Trick lifted from
  # https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
  @swar_ge_slash 0x51515151515151

  # Per-byte validity. One guard per (encoding, case). Used in the SWAR
  # clauses for the 8th byte of the stride, and in the body of tail clauses.
  defguardp valid_char16upper?(c) when c in ?0..?9 or c in ?A..?F
  defguardp valid_char16lower?(c) when c in ?0..?9 or c in ?a..?f
  defguardp valid_char16mixed?(c) when c in ?0..?9 or c in ?A..?F or c in ?a..?f

  defguardp valid_char32upper?(c) when c in ?A..?Z or c in ?2..?7
  defguardp valid_char32lower?(c) when c in ?a..?z or c in ?2..?7
  defguardp valid_char32mixed?(c) when c in ?A..?Z or c in ?a..?z or c in ?2..?7

  # Most common range first — for short-circuit OR, this minimises avg
  # comparisons in the 8th-byte SWAR check + per-byte tail. In hex base32,
  # letters dominate (22/32) over digits (10/32), so letters go first.
  defguardp valid_char32hexupper?(c) when c in ?A..?V or c in ?0..?9
  defguardp valid_char32hexlower?(c) when c in ?a..?v or c in ?0..?9
  defguardp valid_char32hexmixed?(c) when c in ?A..?V or c in ?a..?v or c in ?0..?9

  # base64 alphabets have 3 ranges (A-Z, a-z, 0-9) + 2 singletons. Singletons
  # are excluded from SWAR; chunks containing them fall to per-byte. Order:
  # letters most common (~82%), digits (~16%), singletons (~3%).
  defguardp valid_char64base?(c)
            when c in ?A..?Z or c in ?a..?z or c in ?0..?9 or c == ?+ or c == ?/

  defguardp valid_char64url?(c)
            when c in ?A..?Z or c in ?a..?z or c in ?0..?9 or c == ?- or c == ?_

  # SWAR 7-byte word validity. One guard per (encoding, case).
  #
  # Structure:
  #   1. ASCII gate `band(w, MASK80) == 0`:
  #      every byte < 0x80 so the additions below cannot carry across lanes.
  #
  #   2. "Each byte is in range A OR range B (OR range C)" gate:
  #      For each valid range [lo, hi], one SWAR mask
  #          (w + @swar_ge_lo) bxor (w + @swar_gt_hi)
  #      has high bit = 1 in lanes where the byte is in [lo, hi]. OR the
  #      per-range masks, AND with MASK80, demand all 7 high bits are set.
  defguardp valid_word16upper?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_0, w + @swar_gt_9),
                       bxor(w + @swar_ge_A, w + @swar_gt_F)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word16lower?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_0, w + @swar_gt_9),
                       bxor(w + @swar_ge_a, w + @swar_gt_f)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word16mixed?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bxor(w + @swar_ge_0, w + @swar_gt_9),
                         bxor(w + @swar_ge_A, w + @swar_gt_F)
                       ),
                       bxor(w + @swar_ge_a, w + @swar_gt_f)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32upper?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_A, w + @swar_gt_Z),
                       bxor(w + @swar_ge_2, w + @swar_gt_7)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32lower?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_a, w + @swar_gt_z),
                       bxor(w + @swar_ge_2, w + @swar_gt_7)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32mixed?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bxor(w + @swar_ge_A, w + @swar_gt_Z),
                         bxor(w + @swar_ge_a, w + @swar_gt_z)
                       ),
                       bxor(w + @swar_ge_2, w + @swar_gt_7)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32hexupper?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_0, w + @swar_gt_9),
                       bxor(w + @swar_ge_A, w + @swar_gt_V)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32hexlower?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bxor(w + @swar_ge_0, w + @swar_gt_9),
                       bxor(w + @swar_ge_a, w + @swar_gt_v)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word32hexmixed?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bxor(w + @swar_ge_0, w + @swar_gt_9),
                         bxor(w + @swar_ge_A, w + @swar_gt_V)
                       ),
                       bxor(w + @swar_ge_a, w + @swar_gt_v)
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  # SWAR for base64 standard: 3 ranges OR'd with a single Mycroft singleton
  # for '+'. The digit range is extended to [0x2F, 0x39] so it absorbs '/'
  # (0x2F) — Lemire-style range/singleton merge.
  defguardp valid_word64base?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bor(
                           bxor(w + @swar_ge_A, w + @swar_gt_Z),
                           bxor(w + @swar_ge_a, w + @swar_gt_z)
                         ),
                         bxor(w + @swar_ge_slash, w + @swar_gt_9)
                       ),
                       bxor(w, @swar_plus_x7) - @swar_mask01
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  defguardp valid_word64url?(w)
            when band(w, @swar_mask80) == 0 and
                   band(
                     bor(
                       bor(
                         bor(
                           bxor(w + @swar_ge_A, w + @swar_gt_Z),
                           bxor(w + @swar_ge_a, w + @swar_gt_z)
                         ),
                         bxor(w + @swar_ge_0, w + @swar_gt_9)
                       ),
                       bor(
                         bxor(w, @swar_dash_x7) - @swar_mask01,
                         bxor(w, @swar_under_x7) - @swar_mask01
                       )
                     ),
                     @swar_mask80
                   ) == @swar_mask80

  # =========================================================================
  # base16
  # =========================================================================

  def valid16?(string, opts \\ [])

  def valid16?(string, opts) when is_binary(string) and rem(byte_size(string), 2) == 0 do
    case Keyword.get(opts, :case, :upper) do
      :upper -> validate16upper?(string)
      :lower -> validate16lower?(string)
      :mixed -> validate16mixed?(string)
    end
  end

  def valid16?(string, _opts) when is_binary(string), do: false

  # --- :upper -------------------------------------------------------------

  defp validate16upper?(<<w::56, rest::binary>>),
    do: valid_word16upper?(w) and validate16upper?(rest)

  defp validate16upper?(<<>>), do: true

  defp validate16upper?(<<char, rest::binary>>),
    do: valid_char16upper?(char) and validate16upper?(rest)

  # --- :lower -------------------------------------------------------------

  defp validate16lower?(<<w::56, rest::binary>>),
    do: valid_word16lower?(w) and validate16lower?(rest)

  defp validate16lower?(<<>>), do: true

  defp validate16lower?(<<char, rest::binary>>),
    do: valid_char16lower?(char) and validate16lower?(rest)

  # --- :mixed -------------------------------------------------------------

  defp validate16mixed?(<<w::56, rest::binary>>),
    do: valid_word16mixed?(w) and validate16mixed?(rest)

  defp validate16mixed?(<<>>), do: true

  defp validate16mixed?(<<char, rest::binary>>),
    do: valid_char16mixed?(char) and validate16mixed?(rest)

  # =========================================================================
  # base32
  # =========================================================================

  def valid32?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)

    case Keyword.get(opts, :case, :upper) do
      :upper -> validate32upper?(string, pad?)
      :lower -> validate32lower?(string, pad?)
      :mixed -> validate32mixed?(string, pad?)
    end
  end

  def hex_valid32?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)

    case Keyword.get(opts, :case, :upper) do
      :upper -> validate32hexupper?(string, pad?)
      :lower -> validate32hexlower?(string, pad?)
      :mixed -> validate32hexmixed?(string, pad?)
    end
  end

  # The base32 stride mirrors Baseline: split into `main` (multiple of 8 bytes)
  # and a `rest` (1-8 bytes) so the last block's padding patterns are handled
  # per-byte. SWAR only fast-paths the `main` loop. Same machinery covers
  # both the regular alphabet (`:upper`/`:lower`/`:mixed`) and the
  # extended-hex alphabet (`:hexupper`/`:hexlower`/`:hexmixed`).
  for {case_name, char_guard, word_guard} <- [
        {:upper, :valid_char32upper?, :valid_word32upper?},
        {:lower, :valid_char32lower?, :valid_word32lower?},
        {:mixed, :valid_char32mixed?, :valid_word32mixed?},
        {:hexupper, :valid_char32hexupper?, :valid_word32hexupper?},
        {:hexlower, :valid_char32hexlower?, :valid_word32hexlower?},
        {:hexmixed, :valid_char32hexmixed?, :valid_word32hexmixed?}
      ] do
    validate_name = :"validate32#{case_name}?"
    validate_main_name = :"validate_main32#{case_name}?"

    defp unquote(validate_name)(<<>>, _pad?), do: true

    defp unquote(validate_name)(string, pad?) do
      segs = div(byte_size(string) + 7, 8) - 1
      <<main::size(^segs)-binary-unit(64), rest::binary>> = string
      main_valid? = unquote(validate_main_name)(main)

      case rest do
        _ when not main_valid? ->
          false

        <<c1, c2, ?=, ?=, ?=, ?=, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2)

        <<c1, c2, c3, c4, ?=, ?=, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4)

        <<c1, c2, c3, c4, c5, ?=, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5)

        <<c1, c2, c3, c4, c5, c6, c7, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7)

        <<c1, c2, c3, c4, c5, c6, c7, c8>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7) and unquote(char_guard)(c8)

        <<c1, c2>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2)

        <<c1, c2, c3, c4>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4)

        <<c1, c2, c3, c4, c5>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5)

        <<c1, c2, c3, c4, c5, c6, c7>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7)

        _ ->
          false
      end
    end

    defp unquote(validate_main_name)(<<w::56, rest::binary>>),
      do: unquote(word_guard)(w) and unquote(validate_main_name)(rest)

    defp unquote(validate_main_name)(<<>>), do: true

    defp unquote(validate_main_name)(<<char, rest::binary>>),
      do: unquote(char_guard)(char) and unquote(validate_main_name)(rest)
  end

  # =========================================================================
  # base64
  # =========================================================================

  defp remove_ignored(string, nil), do: string

  defp remove_ignored(string, :whitespace) do
    for <<char::8 <- string>>, char not in ~c"\s\t\r\n", into: <<>>, do: <<char::8>>
  end

  def valid64?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)
    string |> remove_ignored(opts[:ignore]) |> validate64base?(pad?)
  end

  def url_valid64?(string, opts \\ []) when is_binary(string) do
    pad? = Keyword.get(opts, :padding, true)
    string |> remove_ignored(opts[:ignore]) |> validate64url?(pad?)
  end

  # Same dispatch shape as base32: split into `main` (multiple of 8 bytes) and
  # `rest` (≤8 bytes containing padding). SWAR includes singletons via
  # Mycroft, so no per-byte fallback is needed in the main loop.
  for {variant, char_guard, word_guard} <- [
        {:base, :valid_char64base?, :valid_word64base?},
        {:url, :valid_char64url?, :valid_word64url?}
      ] do
    validate_name = :"validate64#{variant}?"
    validate_main_name = :"validate_main64#{variant}?"

    defp unquote(validate_name)(<<>>, _pad?), do: true

    defp unquote(validate_name)(string, pad?) do
      segs = div(byte_size(string) + 7, 8) - 1
      <<main::size(^segs)-binary-unit(64), rest::binary>> = string
      main_valid? = unquote(validate_main_name)(main)

      case rest do
        _ when not main_valid? ->
          false

        <<c1, c2, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2)

        <<c1, c2, c3, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and unquote(char_guard)(c3)

        <<c1, c2, c3, c4>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4)

        <<c1, c2, c3, c4, c5, c6, ?=, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6)

        <<c1, c2, c3, c4, c5, c6, c7, ?=>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7)

        <<c1, c2, c3, c4, c5, c6, c7, c8>> ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7) and unquote(char_guard)(c8)

        <<c1, c2>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2)

        <<c1, c2, c3>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and unquote(char_guard)(c3)

        <<c1, c2, c3, c4, c5, c6>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6)

        <<c1, c2, c3, c4, c5, c6, c7>> when not pad? ->
          unquote(char_guard)(c1) and unquote(char_guard)(c2) and
            unquote(char_guard)(c3) and unquote(char_guard)(c4) and
            unquote(char_guard)(c5) and unquote(char_guard)(c6) and
            unquote(char_guard)(c7)

        _ ->
          false
      end
    end

    defp unquote(validate_main_name)(<<w::56, rest::binary>>),
      do: unquote(word_guard)(w) and unquote(validate_main_name)(rest)

    defp unquote(validate_main_name)(<<>>), do: true

    defp unquote(validate_main_name)(<<char, rest::binary>>),
      do: unquote(char_guard)(char) and unquote(validate_main_name)(rest)
  end
end

sizes = [
  {"1KiB", 1 * 1024},
  {"100KiB", 100 * 1024},
  {"1MiB", 1024 * 1024}
]

# Each input is `{binary, case_opt}` so both scenarios (Baseline / Optimised)
# call the same function with the same arguments and Benchee shows them
# head-to-head per input row.
inputs =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      upper = Base.encode16(data),
      lower = Base.encode16(data, case: :lower),
      # Mixed-case: flip every other letter to lowercase.
      mixed =
        upper
        |> :binary.bin_to_list()
        |> Enum.with_index()
        |> Enum.map(fn
          {c, i} when c in ?A..?F and rem(i, 2) == 1 -> c - ?A + ?a
          {c, _} -> c
        end)
        |> :binary.list_to_bin(),
      # Slow-path shapes (upper only; 'Z' is invalid in all three modes anyway).
      invalid_at_end = binary_part(upper, 0, byte_size(upper) - 1) <> "Z",
      mid = div(byte_size(upper), 2),
      invalid_at_mid =
        binary_part(upper, 0, mid) <>
          "Z" <> binary_part(upper, mid + 1, byte_size(upper) - mid - 1),
      {label, payload} <- [
        {"#{size_label} upper valid", {upper, :upper}},
        {"#{size_label} lower valid", {lower, :lower}},
        {"#{size_label} mixed valid", {mixed, :mixed}},
        {"#{size_label} upper invalid@end", {invalid_at_end, :upper}},
        {"#{size_label} upper invalid@mid", {invalid_at_mid, :upper}}
      ],
      into: %{},
      do: {label, payload}

# Sanity-check Baseline and Optimised against the stdlib Base for every input.
for {label, {bin, case_opt}} <- inputs do
  ref = Base.valid16?(bin, case: case_opt)
  b = Baseline.valid16?(bin, case: case_opt)
  o = Optimised.valid16?(bin, case: case_opt)

  ref === b ||
    raise "Baseline mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

IO.puts("# valid16? equivalence check passed.\n")

# --- base32 inputs ------------------------------------------------------

inputs32 =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      upper = Base.encode32(data),
      lower = Base.encode32(data, case: :lower),
      mixed =
        upper
        |> :binary.bin_to_list()
        |> Enum.with_index()
        |> Enum.map(fn
          {c, i} when c in ?A..?Z and rem(i, 2) == 1 -> c - ?A + ?a
          {c, _} -> c
        end)
        |> :binary.list_to_bin(),
      # Slow-path: '!' (0x21) is invalid in every base32 alphabet.
      # Replacing the last char may overwrite a `=` pad byte; that's fine —
      # both impls then fall through their `case rest do` to `_ -> false`.
      invalid_at_end = binary_part(upper, 0, byte_size(upper) - 1) <> "!",
      mid = div(byte_size(upper), 2),
      invalid_at_mid =
        binary_part(upper, 0, mid) <>
          "!" <> binary_part(upper, mid + 1, byte_size(upper) - mid - 1),
      {label, payload} <- [
        {"#{size_label} upper valid", {upper, :upper}},
        {"#{size_label} lower valid", {lower, :lower}},
        {"#{size_label} mixed valid", {mixed, :mixed}},
        {"#{size_label} upper invalid@end", {invalid_at_end, :upper}},
        {"#{size_label} upper invalid@mid", {invalid_at_mid, :upper}}
      ],
      into: %{},
      do: {label, payload}

for {label, {bin, case_opt}} <- inputs32 do
  ref = Base.valid32?(bin, case: case_opt)
  b = Baseline.valid32?(bin, case: case_opt)
  o = Optimised.valid32?(bin, case: case_opt)

  ref === b ||
    raise "Baseline valid32? mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised valid32? mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

IO.puts("# valid32? equivalence check passed.\n")

# --- base32 hex inputs --------------------------------------------------

inputs32hex =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      upper = Base.hex_encode32(data),
      lower = Base.hex_encode32(data, case: :lower),
      mixed =
        upper
        |> :binary.bin_to_list()
        |> Enum.with_index()
        |> Enum.map(fn
          {c, i} when c in ?A..?V and rem(i, 2) == 1 -> c - ?A + ?a
          {c, _} -> c
        end)
        |> :binary.list_to_bin(),
      # Slow-path: '!' (0x21) is invalid in every base32hex alphabet too.
      invalid_at_end = binary_part(upper, 0, byte_size(upper) - 1) <> "!",
      mid = div(byte_size(upper), 2),
      invalid_at_mid =
        binary_part(upper, 0, mid) <>
          "!" <> binary_part(upper, mid + 1, byte_size(upper) - mid - 1),
      {label, payload} <- [
        {"#{size_label} upper valid", {upper, :upper}},
        {"#{size_label} lower valid", {lower, :lower}},
        {"#{size_label} mixed valid", {mixed, :mixed}},
        {"#{size_label} upper invalid@end", {invalid_at_end, :upper}},
        {"#{size_label} upper invalid@mid", {invalid_at_mid, :upper}}
      ],
      into: %{},
      do: {label, payload}

for {label, {bin, case_opt}} <- inputs32hex do
  ref = Base.hex_valid32?(bin, case: case_opt)
  b = Baseline.hex_valid32?(bin, case: case_opt)
  o = Optimised.hex_valid32?(bin, case: case_opt)

  ref === b ||
    raise "Baseline hex_valid32? mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised hex_valid32? mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

IO.puts("# hex_valid32? equivalence check passed.\n")

# --- base64 inputs ------------------------------------------------------
#
# Encoding random bytes via Base.encode64/url_encode64 yields a uniform
# distribution over the 64-char alphabet — each character position has a
# 2/64 = 3.125% chance of being a "singleton" (`+`/`/` for base, `-`/`_`
# for url). This matches the real-world distribution of base64-encoded
# binary data (random/compressed payloads, tokens, etc.).
#
# We also generate an "alnum valid" variant: a synthetic input drawn only
# from `A-Z, a-z, 0-9` (0% singletons). This isolates how much of the
# overall cost depends on singletons specifically — useful when comparing
# "include singletons in SWAR" vs "alnum-only SWAR + scalar fallback".

alnum64_alphabet = ~c"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"

alnum_pad_to_4 = fn binary ->
  case rem(byte_size(binary), 4) do
    0 -> binary
    1 -> binary_part(binary, 0, byte_size(binary) - 1)
    2 -> binary <> "=="
    3 -> binary <> "="
  end
end

alnum_string = fn target_len ->
  for(_ <- 1..target_len, into: <<>>, do: <<Enum.random(alnum64_alphabet)>>)
  |> alnum_pad_to_4.()
end

inputs64base =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      base = Base.encode64(data),
      alnum = alnum_string.(byte_size(base)),
      # Slow-path: '!' (0x21) is invalid in every base64 alphabet.
      invalid_at_end = binary_part(base, 0, byte_size(base) - 1) <> "!",
      mid = div(byte_size(base), 2),
      invalid_at_mid =
        binary_part(base, 0, mid) <>
          "!" <> binary_part(base, mid + 1, byte_size(base) - mid - 1),
      {label, payload} <- [
        {"#{size_label} valid (random, ~3.1% +/)", base},
        {"#{size_label} alnum valid (0% +/)", alnum},
        {"#{size_label} invalid@end", invalid_at_end},
        {"#{size_label} invalid@mid", invalid_at_mid}
      ],
      into: %{},
      do: {label, payload}

inputs64url =
  for {size_label, n} <- sizes,
      data = :crypto.strong_rand_bytes(n),
      url = Base.url_encode64(data),
      alnum = alnum_string.(byte_size(url)),
      invalid_at_end = binary_part(url, 0, byte_size(url) - 1) <> "!",
      mid = div(byte_size(url), 2),
      invalid_at_mid =
        binary_part(url, 0, mid) <>
          "!" <> binary_part(url, mid + 1, byte_size(url) - mid - 1),
      {label, payload} <- [
        {"#{size_label} valid (random, ~3.1% -_)", url},
        {"#{size_label} alnum valid (0% -_)", alnum},
        {"#{size_label} invalid@end", invalid_at_end},
        {"#{size_label} invalid@mid", invalid_at_mid}
      ],
      into: %{},
      do: {label, payload}

# Sanity-print singleton density on the 1 MiB samples so the bench output
# carries a self-documenting reminder of what "valid" actually contains.
_ =
  for {label, bin} <- inputs64base,
      String.starts_with?(label, "1MiB valid"),
      do:
        (
          count =
            Enum.count(:binary.bin_to_list(bin), fn c -> c == ?+ or c == ?/ end)

          pct = Float.round(count / byte_size(bin) * 100, 3)
          IO.puts("# inputs64base #{label}: #{count} +/ in #{byte_size(bin)} bytes (#{pct}%)")
        )

for {label, bin} <- inputs64base do
  ref = Base.valid64?(bin)
  b = Baseline.valid64?(bin)
  o = Optimised.valid64?(bin)

  ref === b ||
    raise "Baseline valid64? mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised valid64? mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

for {label, bin} <- inputs64url do
  ref = Base.url_valid64?(bin)
  b = Baseline.url_valid64?(bin)
  o = Optimised.url_valid64?(bin)

  ref === b ||
    raise "Baseline url_valid64? mismatch on #{label}: stdlib=#{ref} baseline=#{b}"

  ref === o ||
    raise "Optimised url_valid64? mismatch on #{label}: stdlib=#{ref} optimised=#{o}"
end

IO.puts("# valid64? / url_valid64? equivalence check passed.\n")

if run?.("valid16") do
  IO.puts("\n========== valid16? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn {s, c} -> Baseline.valid16?(s, case: c) end,
      "Optimised" => fn {s, c} -> Optimised.valid16?(s, case: c) end
    },
    inputs: inputs,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

if run?.("valid32") do
  IO.puts("\n========== valid32? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn {s, c} -> Baseline.valid32?(s, case: c) end,
      "Optimised" => fn {s, c} -> Optimised.valid32?(s, case: c) end
    },
    inputs: inputs32,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

if run?.("hex_valid32") do
  IO.puts("\n========== hex_valid32? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn {s, c} -> Baseline.hex_valid32?(s, case: c) end,
      "Optimised" => fn {s, c} -> Optimised.hex_valid32?(s, case: c) end
    },
    inputs: inputs32hex,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

if run?.("valid64") do
  IO.puts("\n========== valid64? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn s -> Baseline.valid64?(s) end,
      "Optimised" => fn s -> Optimised.valid64?(s) end
    },
    inputs: inputs64base,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

if run?.("url_valid64") do
  IO.puts("\n========== url_valid64? benchmark ==========\n")

  Benchee.run(
    %{
      "Baseline" => fn s -> Baseline.url_valid64?(s) end,
      "Optimised" => fn s -> Optimised.url_valid64?(s) end
    },
    inputs: inputs64url,
    warmup: 2,
    time: 5,
    print: [fast_warning: false, benchmarking: false]
  )
end

@PJUllrich PJUllrich changed the title Add SWAR versions of Base.valid16? and Base.valid32? Add SWAR versions of Base validations May 11, 2026
@josevalim josevalim merged commit 7081e72 into elixir-lang:main May 11, 2026
15 checks passed
@josevalim
Copy link
Copy Markdown
Member

💚 💙 💜 💛 ❤️

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Development

Successfully merging this pull request may close these issues.

2 participants