Skip to content

Commit d964455

Browse files
add Bitap algorithm; misc. improvements
1 parent acc0dab commit d964455

4 files changed

Lines changed: 82 additions & 5 deletions

File tree

src/bitap.zig

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/// Bitap algorithm (exact matching variant)
2+
///
3+
/// See https://en.wikipedia.org/wiki/Bitap_algorithm for more details
4+
const std = @import("std");
5+
const assert = std.debug.assert;
6+
const testing = std.testing;
7+
8+
const test_suites = @import("test_cases.zig").test_suites;
9+
const possibleValues = @import("common.zig").possibleValues;
10+
11+
pub fn bitap(
12+
comptime T: type,
13+
comptime max_pattern_length: comptime_int,
14+
text: T,
15+
pattern: T,
16+
) ?usize {
17+
assert(std.meta.trait.isIndexable(T));
18+
const ElemType = std.meta.Elem(T);
19+
assert(@typeInfo(ElemType) == .Int);
20+
assert(pattern.len <= max_pattern_length);
21+
22+
const Int = @Type(std.builtin.TypeInfo{
23+
.Int = .{
24+
.is_signed = false,
25+
.bits = max_pattern_length + 1,
26+
},
27+
});
28+
const Log2Int = @Type(std.builtin.TypeInfo{
29+
.Int = .{
30+
.is_signed = false,
31+
.bits = std.math.log2(max_pattern_length + 1),
32+
},
33+
});
34+
const possible_values = possibleValues(ElemType);
35+
36+
if (pattern.len == 0) return 0;
37+
const len = @intCast(Log2Int, pattern.len);
38+
39+
const one: Int = 1;
40+
41+
var R: Int = ~one;
42+
var pattern_mask = [_]Int{std.math.maxInt(Int)} ** possible_values;
43+
44+
for (pattern) |x, i| {
45+
pattern_mask[x] &= ~(one << @intCast(Log2Int, i));
46+
}
47+
48+
for (text) |x, i| {
49+
R |= pattern_mask[x];
50+
R <<= 1;
51+
52+
if ((R & (one << len)) == 0) {
53+
return if (i < len) 0 else i - len + 1;
54+
}
55+
}
56+
57+
return null;
58+
}
59+
60+
test "bitap" {
61+
inline for (&[_]comptime_int{ 31, 63, 127, 59, 67 }) |max_pattern_length| {
62+
for (test_suites) |suite| {
63+
for (suite.cases) |case| {
64+
testing.expectEqual(case.expected, bitap(
65+
[]const u8,
66+
max_pattern_length,
67+
case.text,
68+
suite.pattern,
69+
));
70+
}
71+
}
72+
}
73+
}

src/boyer_moore.zig

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
44
const testing = std.testing;
55

66
const test_suites = @import("test_cases.zig").test_suites;
7+
const possibleValues = @import("common.zig").possibleValues;
78

89
pub fn StringFinder(comptime T: type) type {
910
assert(std.meta.trait.isIndexable(T));
@@ -27,11 +28,6 @@ pub fn StringFinder(comptime T: type) type {
2728
.good_suffix = &[_]usize{},
2829
};
2930

30-
/// Returns the number of possible values for an integer type
31-
fn possibleValues(comptime Type: type) usize {
32-
return std.math.maxInt(Type) - std.math.minInt(Type) + 1;
33-
}
34-
3531
/// Returns the maximum length of suffixes of both strings
3632
fn longestCommonSuffix(a: T, b: T) usize {
3733
var i: usize = 0;

src/common.zig

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
const std = @import("std");
2+
3+
/// Returns the number of possible values for an integer type
4+
pub fn possibleValues(comptime Type: type) comptime_int {
5+
return std.math.maxInt(Type) - std.math.minInt(Type) + 1;
6+
}

src/main.zig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
pub const bitap = @import("bitap.zig");
12
pub const boyer_moore = @import("boyer_moore.zig");
23

34
test "" {
5+
_ = @import("bitap.zig");
46
_ = @import("boyer_moore.zig");
57
}

0 commit comments

Comments
 (0)