Skip to content

Commit 85e6444

Browse files
Initial commit
0 parents  commit 85e6444

4 files changed

Lines changed: 177 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
zig-cache/

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# zig-boyer-moore
2+
3+
Implementation of the Boyer-Moore string-search algorithm in
4+
[zig](https://ziglang.org). Ported from the implementation in the Go
5+
standard library:
6+
[strings/search.go](https://golang.org/src/strings/search.go).

build.zig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
const Builder = @import("std").build.Builder;
2+
3+
pub fn build(b: *Builder) void {
4+
const mode = b.standardReleaseOptions();
5+
const lib = b.addStaticLibrary("zig-boyer-moore", "src/main.zig");
6+
lib.setBuildMode(mode);
7+
lib.install();
8+
9+
var main_tests = b.addTest("src/main.zig");
10+
main_tests.setBuildMode(mode);
11+
12+
const test_step = b.step("test", "Run library tests");
13+
test_step.dependOn(&main_tests.step);
14+
}

src/main.zig

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
const std = @import("std");
2+
const Allocator = std.mem.Allocator;
3+
const testing = std.testing;
4+
5+
/// Returns the maximum length of suffixes of both strings
6+
fn longestCommonSuffix(a: []const u8, b: []const u8) usize {
7+
var i: usize = 0;
8+
while (i < a.len and i < b.len) : (i += 1) {
9+
if (a[(a.len - 1) - i] != b[(b.len - 1) - i]) {
10+
break;
11+
}
12+
}
13+
return i;
14+
}
15+
16+
pub const StringFinder = struct {
17+
allocator: ?*Allocator,
18+
pattern: []const u8,
19+
bad_char: [possible_values]usize,
20+
good_suffix: []usize,
21+
22+
const possible_values = std.math.maxInt(u8) + 1;
23+
const Self = @This();
24+
25+
/// An empty pattern, requires no allocations
26+
pub const empty = Self{
27+
.allocator = null,
28+
.pattern = "",
29+
.bad_char = [_]usize{0} ** possible_values,
30+
.good_suffix = &[_]usize{},
31+
};
32+
33+
/// Initializes a StringFinder with a pattern
34+
pub fn init(allocator: *Allocator, pattern: []const u8) !Self {
35+
if (pattern.len == 0) return Self.empty;
36+
37+
var self = Self{
38+
.allocator = allocator,
39+
.pattern = pattern,
40+
.bad_char = undefined,
41+
.good_suffix = try allocator.alloc(usize, pattern.len),
42+
};
43+
44+
const last = pattern.len - 1;
45+
46+
// Initialize bad character rule table
47+
for (self.bad_char) |*x| x.* = pattern.len;
48+
49+
for (pattern) |x, i| {
50+
if (i == last) break;
51+
self.bad_char[x] = last - i;
52+
}
53+
54+
// Build good suffix rule table
55+
var last_prefix = last;
56+
57+
// First pass
58+
{
59+
var i = last + 1;
60+
while (i > 0) : (i -= 1) {
61+
if (std.mem.startsWith(u8, pattern, pattern[i..])) {
62+
last_prefix = i;
63+
}
64+
self.good_suffix[i - 1] = last_prefix + last - (i - 1);
65+
}
66+
}
67+
68+
// Second pass
69+
{
70+
var i: usize = 0;
71+
while (i < last) : (i += 1) {
72+
const len_suffix = longestCommonSuffix(pattern, pattern[1 .. i + 1]);
73+
if (pattern[i - len_suffix] != pattern[last - len_suffix]) {
74+
self.good_suffix[last - len_suffix] = len_suffix + last - i;
75+
}
76+
}
77+
}
78+
79+
return self;
80+
}
81+
82+
/// Frees all memory allocated by this string searcher
83+
pub fn deinit(self: *Self) void {
84+
if (self.allocator) |allocator| {
85+
allocator.free(self.good_suffix);
86+
}
87+
}
88+
89+
/// Returns the index of the first occurence of the pattern in the
90+
/// text. Returns null if the pattern wasn't found
91+
pub fn next(self: Self, text: []const u8) ?usize {
92+
var i: usize = self.pattern.len;
93+
while (i <= text.len) {
94+
// Try to match starting from the end of the pattern
95+
var j: usize = self.pattern.len;
96+
while (j > 0 and text[i - 1] == self.pattern[j - 1]) {
97+
i -= 1;
98+
j -= 1;
99+
}
100+
101+
// If we matched until the beginning of the pattern, we
102+
// have a match
103+
if (j == 0) {
104+
return i;
105+
}
106+
107+
// Use the bad character table and the good suffix table
108+
// to advance our position
109+
i += std.math.max(
110+
self.bad_char[text[i - 1]],
111+
self.good_suffix[j - 1],
112+
);
113+
}
114+
115+
return null;
116+
}
117+
};
118+
119+
test "empty pattern" {
120+
const allocator = testing.allocator;
121+
122+
var sf = try StringFinder.init(allocator, "");
123+
defer sf.deinit();
124+
125+
testing.expectEqual(@as(?usize, 0), sf.next("zig"));
126+
testing.expectEqual(@as(?usize, 0), sf.next(""));
127+
testing.expectEqual(@as(?usize, 0), sf.next("a"));
128+
testing.expectEqual(@as(?usize, 0), sf.next("lang"));
129+
}
130+
131+
test "pattern with length 1" {
132+
const allocator = testing.allocator;
133+
134+
var sf = try StringFinder.init(allocator, "a");
135+
defer sf.deinit();
136+
137+
testing.expectEqual(@as(?usize, null), sf.next("zig"));
138+
testing.expectEqual(@as(?usize, null), sf.next(""));
139+
testing.expectEqual(@as(?usize, 0), sf.next("a"));
140+
testing.expectEqual(@as(?usize, 1), sf.next("lang"));
141+
}
142+
143+
test "test matching" {
144+
const allocator = testing.allocator;
145+
146+
var sf = try StringFinder.init(allocator, "zig");
147+
defer sf.deinit();
148+
149+
testing.expectEqual(@as(?usize, 0), sf.next("zig"));
150+
testing.expectEqual(@as(?usize, 0), sf.next("ziglang"));
151+
testing.expectEqual(@as(?usize, 4), sf.next("langzig"));
152+
testing.expectEqual(@as(?usize, 4), sf.next("langziglang"));
153+
testing.expectEqual(@as(?usize, null), sf.next(""));
154+
testing.expectEqual(@as(?usize, null), sf.next("firefox"));
155+
testing.expectEqual(@as(?usize, 8), sf.next("abc abc ziglang"));
156+
}

0 commit comments

Comments
 (0)