Skip to content

Commit 15a7ec2

Browse files
support other types
1 parent 844d528 commit 15a7ec2

1 file changed

Lines changed: 107 additions & 95 deletions

File tree

src/boyer_moore.zig

Lines changed: 107 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1,125 +1,137 @@
11
const std = @import("std");
22
const Allocator = std.mem.Allocator;
3+
const assert = std.debug.assert;
34
const testing = std.testing;
45

5-
/// Returns the maximum length of suffixes of both strings
6-
fn longestCommonSuffix(a: []const u8, b: []const u8) usize {
7-
var i: usize = 0;
8-
while (i < a.len and i < b.len) : (i += 1) {
9-
if (a[(a.len - 1) - i] != b[(b.len - 1) - i]) {
10-
break;
6+
pub fn StringFinder(comptime T: type) type {
7+
assert(std.meta.trait.isIndexable(T));
8+
const ElemType = std.meta.Elem(T);
9+
assert(@typeInfo(ElemType) == .Int);
10+
11+
return struct {
12+
allocator: ?*Allocator,
13+
pattern: T,
14+
bad_char: [possible_values]usize,
15+
good_suffix: []usize,
16+
17+
const possible_values = possibleValues(ElemType);
18+
const Self = @This();
19+
20+
/// An empty pattern, requires no allocations
21+
pub const empty = Self{
22+
.allocator = null,
23+
.pattern = "",
24+
.bad_char = [_]usize{0} ** possible_values,
25+
.good_suffix = &[_]usize{},
26+
};
27+
28+
/// Returns the number of possible values for an integer type
29+
fn possibleValues(comptime Type: type) usize {
30+
return std.math.maxInt(Type) - std.math.minInt(Type) + 1;
1131
}
12-
}
13-
return i;
14-
}
1532

16-
pub const StringFinder = struct {
17-
allocator: ?*Allocator,
18-
pattern: []const u8,
19-
bad_char: [possible_values]usize,
20-
good_suffix: []usize,
21-
22-
const possible_values = std.math.maxInt(u8) + 1;
23-
const Self = @This();
24-
25-
/// An empty pattern, requires no allocations
26-
pub const empty = Self{
27-
.allocator = null,
28-
.pattern = "",
29-
.bad_char = [_]usize{0} ** possible_values,
30-
.good_suffix = &[_]usize{},
31-
};
33+
/// Returns the maximum length of suffixes of both strings
34+
fn longestCommonSuffix(a: T, b: T) usize {
35+
var i: usize = 0;
36+
while (i < a.len and i < b.len) : (i += 1) {
37+
if (a[(a.len - 1) - i] != b[(b.len - 1) - i]) {
38+
break;
39+
}
40+
}
41+
return i;
42+
}
3243

33-
/// Initializes a StringFinder with a pattern
34-
pub fn init(allocator: *Allocator, pattern: []const u8) !Self {
35-
if (pattern.len == 0) return Self.empty;
44+
/// Initializes a StringFinder with a pattern
45+
pub fn init(allocator: *Allocator, pattern: T) !Self {
46+
if (pattern.len == 0) return Self.empty;
3647

37-
var self = Self{
38-
.allocator = allocator,
39-
.pattern = pattern,
40-
.bad_char = undefined,
41-
.good_suffix = try allocator.alloc(usize, pattern.len),
42-
};
48+
var self = Self{
49+
.allocator = allocator,
50+
.pattern = pattern,
51+
.bad_char = undefined,
52+
.good_suffix = try allocator.alloc(usize, pattern.len),
53+
};
4354

44-
const last = pattern.len - 1;
55+
const last = pattern.len - 1;
4556

46-
// Initialize bad character rule table
47-
for (self.bad_char) |*x| x.* = pattern.len;
57+
// Initialize bad character rule table
58+
for (self.bad_char) |*x| x.* = pattern.len;
4859

49-
for (pattern) |x, i| {
50-
if (i == last) break;
51-
self.bad_char[x] = last - i;
52-
}
60+
for (pattern) |x, i| {
61+
if (i == last) break;
62+
self.bad_char[x] = last - i;
63+
}
5364

54-
// Build good suffix rule table
55-
var last_prefix = last;
65+
// Build good suffix rule table
66+
var last_prefix = last;
67+
68+
// First pass
69+
{
70+
var i = last + 1;
71+
while (i > 0) : (i -= 1) {
72+
if (std.mem.startsWith(ElemType, pattern, pattern[i..])) {
73+
last_prefix = i;
74+
}
75+
self.good_suffix[i - 1] = last_prefix + last - (i - 1);
76+
}
77+
}
5678

57-
// First pass
58-
{
59-
var i = last + 1;
60-
while (i > 0) : (i -= 1) {
61-
if (std.mem.startsWith(u8, pattern, pattern[i..])) {
62-
last_prefix = i;
79+
// Second pass
80+
{
81+
var i: usize = 0;
82+
while (i < last) : (i += 1) {
83+
const len_suffix = longestCommonSuffix(pattern, pattern[1 .. i + 1]);
84+
if (pattern[i - len_suffix] != pattern[last - len_suffix]) {
85+
self.good_suffix[last - len_suffix] = len_suffix + last - i;
86+
}
6387
}
64-
self.good_suffix[i - 1] = last_prefix + last - (i - 1);
6588
}
89+
90+
return self;
6691
}
6792

68-
// Second pass
69-
{
70-
var i: usize = 0;
71-
while (i < last) : (i += 1) {
72-
const len_suffix = longestCommonSuffix(pattern, pattern[1 .. i + 1]);
73-
if (pattern[i - len_suffix] != pattern[last - len_suffix]) {
74-
self.good_suffix[last - len_suffix] = len_suffix + last - i;
75-
}
93+
/// Frees all memory allocated by this string searcher
94+
pub fn deinit(self: *Self) void {
95+
if (self.allocator) |allocator| {
96+
allocator.free(self.good_suffix);
7697
}
7798
}
7899

79-
return self;
80-
}
100+
/// Returns the index of the first occurence of the pattern in the
101+
/// text. Returns null if the pattern wasn't found
102+
pub fn next(self: Self, text: T) ?usize {
103+
var i: usize = self.pattern.len;
104+
while (i <= text.len) {
105+
// Try to match starting from the end of the pattern
106+
var j: usize = self.pattern.len;
107+
while (j > 0 and text[i - 1] == self.pattern[j - 1]) {
108+
i -= 1;
109+
j -= 1;
110+
}
81111

82-
/// Frees all memory allocated by this string searcher
83-
pub fn deinit(self: *Self) void {
84-
if (self.allocator) |allocator| {
85-
allocator.free(self.good_suffix);
86-
}
87-
}
88-
89-
/// Returns the index of the first occurence of the pattern in the
90-
/// text. Returns null if the pattern wasn't found
91-
pub fn next(self: Self, text: []const u8) ?usize {
92-
var i: usize = self.pattern.len;
93-
while (i <= text.len) {
94-
// Try to match starting from the end of the pattern
95-
var j: usize = self.pattern.len;
96-
while (j > 0 and text[i - 1] == self.pattern[j - 1]) {
97-
i -= 1;
98-
j -= 1;
99-
}
112+
// If we matched until the beginning of the pattern, we
113+
// have a match
114+
if (j == 0) {
115+
return i;
116+
}
100117

101-
// If we matched until the beginning of the pattern, we
102-
// have a match
103-
if (j == 0) {
104-
return i;
118+
// Use the bad character table and the good suffix table
119+
// to advance our position
120+
i += std.math.max(
121+
self.bad_char[text[i - 1]],
122+
self.good_suffix[j - 1],
123+
);
105124
}
106125

107-
// Use the bad character table and the good suffix table
108-
// to advance our position
109-
i += std.math.max(
110-
self.bad_char[text[i - 1]],
111-
self.good_suffix[j - 1],
112-
);
126+
return null;
113127
}
114-
115-
return null;
116-
}
117-
};
128+
};
129+
}
118130

119131
test "empty pattern" {
120132
const allocator = testing.allocator;
121133

122-
var sf = try StringFinder.init(allocator, "");
134+
var sf = try StringFinder([]const u8).init(allocator, "");
123135
defer sf.deinit();
124136

125137
testing.expectEqual(@as(?usize, 0), sf.next("zig"));
@@ -131,7 +143,7 @@ test "empty pattern" {
131143
test "pattern with length 1" {
132144
const allocator = testing.allocator;
133145

134-
var sf = try StringFinder.init(allocator, "a");
146+
var sf = try StringFinder([]const u8).init(allocator, "a");
135147
defer sf.deinit();
136148

137149
testing.expectEqual(@as(?usize, null), sf.next("zig"));
@@ -143,7 +155,7 @@ test "pattern with length 1" {
143155
test "test matching" {
144156
const allocator = testing.allocator;
145157

146-
var sf = try StringFinder.init(allocator, "zig");
158+
var sf = try StringFinder([]const u8).init(allocator, "zig");
147159
defer sf.deinit();
148160

149161
testing.expectEqual(@as(?usize, 0), sf.next("zig"));

0 commit comments

Comments
 (0)