Skip to content

Commit 844d528

Browse files
begin support for more algorithms
1 parent 1117424 commit 844d528

2 files changed

Lines changed: 159 additions & 154 deletions

File tree

src/boyer_moore.zig

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
const std = @import("std");
2+
const Allocator = std.mem.Allocator;
3+
const testing = std.testing;
4+
5+
/// Returns the maximum length of suffixes of both strings
6+
fn longestCommonSuffix(a: []const u8, b: []const u8) usize {
7+
var i: usize = 0;
8+
while (i < a.len and i < b.len) : (i += 1) {
9+
if (a[(a.len - 1) - i] != b[(b.len - 1) - i]) {
10+
break;
11+
}
12+
}
13+
return i;
14+
}
15+
16+
pub const StringFinder = struct {
17+
allocator: ?*Allocator,
18+
pattern: []const u8,
19+
bad_char: [possible_values]usize,
20+
good_suffix: []usize,
21+
22+
const possible_values = std.math.maxInt(u8) + 1;
23+
const Self = @This();
24+
25+
/// An empty pattern, requires no allocations
26+
pub const empty = Self{
27+
.allocator = null,
28+
.pattern = "",
29+
.bad_char = [_]usize{0} ** possible_values,
30+
.good_suffix = &[_]usize{},
31+
};
32+
33+
/// Initializes a StringFinder with a pattern
34+
pub fn init(allocator: *Allocator, pattern: []const u8) !Self {
35+
if (pattern.len == 0) return Self.empty;
36+
37+
var self = Self{
38+
.allocator = allocator,
39+
.pattern = pattern,
40+
.bad_char = undefined,
41+
.good_suffix = try allocator.alloc(usize, pattern.len),
42+
};
43+
44+
const last = pattern.len - 1;
45+
46+
// Initialize bad character rule table
47+
for (self.bad_char) |*x| x.* = pattern.len;
48+
49+
for (pattern) |x, i| {
50+
if (i == last) break;
51+
self.bad_char[x] = last - i;
52+
}
53+
54+
// Build good suffix rule table
55+
var last_prefix = last;
56+
57+
// First pass
58+
{
59+
var i = last + 1;
60+
while (i > 0) : (i -= 1) {
61+
if (std.mem.startsWith(u8, pattern, pattern[i..])) {
62+
last_prefix = i;
63+
}
64+
self.good_suffix[i - 1] = last_prefix + last - (i - 1);
65+
}
66+
}
67+
68+
// Second pass
69+
{
70+
var i: usize = 0;
71+
while (i < last) : (i += 1) {
72+
const len_suffix = longestCommonSuffix(pattern, pattern[1 .. i + 1]);
73+
if (pattern[i - len_suffix] != pattern[last - len_suffix]) {
74+
self.good_suffix[last - len_suffix] = len_suffix + last - i;
75+
}
76+
}
77+
}
78+
79+
return self;
80+
}
81+
82+
/// Frees all memory allocated by this string searcher
83+
pub fn deinit(self: *Self) void {
84+
if (self.allocator) |allocator| {
85+
allocator.free(self.good_suffix);
86+
}
87+
}
88+
89+
/// Returns the index of the first occurence of the pattern in the
90+
/// text. Returns null if the pattern wasn't found
91+
pub fn next(self: Self, text: []const u8) ?usize {
92+
var i: usize = self.pattern.len;
93+
while (i <= text.len) {
94+
// Try to match starting from the end of the pattern
95+
var j: usize = self.pattern.len;
96+
while (j > 0 and text[i - 1] == self.pattern[j - 1]) {
97+
i -= 1;
98+
j -= 1;
99+
}
100+
101+
// If we matched until the beginning of the pattern, we
102+
// have a match
103+
if (j == 0) {
104+
return i;
105+
}
106+
107+
// Use the bad character table and the good suffix table
108+
// to advance our position
109+
i += std.math.max(
110+
self.bad_char[text[i - 1]],
111+
self.good_suffix[j - 1],
112+
);
113+
}
114+
115+
return null;
116+
}
117+
};
118+
119+
test "empty pattern" {
120+
const allocator = testing.allocator;
121+
122+
var sf = try StringFinder.init(allocator, "");
123+
defer sf.deinit();
124+
125+
testing.expectEqual(@as(?usize, 0), sf.next("zig"));
126+
testing.expectEqual(@as(?usize, 0), sf.next(""));
127+
testing.expectEqual(@as(?usize, 0), sf.next("a"));
128+
testing.expectEqual(@as(?usize, 0), sf.next("lang"));
129+
}
130+
131+
test "pattern with length 1" {
132+
const allocator = testing.allocator;
133+
134+
var sf = try StringFinder.init(allocator, "a");
135+
defer sf.deinit();
136+
137+
testing.expectEqual(@as(?usize, null), sf.next("zig"));
138+
testing.expectEqual(@as(?usize, null), sf.next(""));
139+
testing.expectEqual(@as(?usize, 0), sf.next("a"));
140+
testing.expectEqual(@as(?usize, 1), sf.next("lang"));
141+
}
142+
143+
test "test matching" {
144+
const allocator = testing.allocator;
145+
146+
var sf = try StringFinder.init(allocator, "zig");
147+
defer sf.deinit();
148+
149+
testing.expectEqual(@as(?usize, 0), sf.next("zig"));
150+
testing.expectEqual(@as(?usize, 0), sf.next("ziglang"));
151+
testing.expectEqual(@as(?usize, 4), sf.next("langzig"));
152+
testing.expectEqual(@as(?usize, 4), sf.next("langziglang"));
153+
testing.expectEqual(@as(?usize, null), sf.next(""));
154+
testing.expectEqual(@as(?usize, null), sf.next("firefox"));
155+
testing.expectEqual(@as(?usize, 8), sf.next("abc abc ziglang"));
156+
}

src/main.zig

Lines changed: 3 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,156 +1,5 @@
1-
const std = @import("std");
2-
const Allocator = std.mem.Allocator;
3-
const testing = std.testing;
1+
pub const boyer_moore = @import("boyer_moore.zig");
42

5-
/// Returns the maximum length of suffixes of both strings
6-
fn longestCommonSuffix(a: []const u8, b: []const u8) usize {
7-
var i: usize = 0;
8-
while (i < a.len and i < b.len) : (i += 1) {
9-
if (a[(a.len - 1) - i] != b[(b.len - 1) - i]) {
10-
break;
11-
}
12-
}
13-
return i;
14-
}
15-
16-
pub const StringFinder = struct {
17-
allocator: ?*Allocator,
18-
pattern: []const u8,
19-
bad_char: [possible_values]usize,
20-
good_suffix: []usize,
21-
22-
const possible_values = std.math.maxInt(u8) + 1;
23-
const Self = @This();
24-
25-
/// An empty pattern, requires no allocations
26-
pub const empty = Self{
27-
.allocator = null,
28-
.pattern = "",
29-
.bad_char = [_]usize{0} ** possible_values,
30-
.good_suffix = &[_]usize{},
31-
};
32-
33-
/// Initializes a StringFinder with a pattern
34-
pub fn init(allocator: *Allocator, pattern: []const u8) !Self {
35-
if (pattern.len == 0) return Self.empty;
36-
37-
var self = Self{
38-
.allocator = allocator,
39-
.pattern = pattern,
40-
.bad_char = undefined,
41-
.good_suffix = try allocator.alloc(usize, pattern.len),
42-
};
43-
44-
const last = pattern.len - 1;
45-
46-
// Initialize bad character rule table
47-
for (self.bad_char) |*x| x.* = pattern.len;
48-
49-
for (pattern) |x, i| {
50-
if (i == last) break;
51-
self.bad_char[x] = last - i;
52-
}
53-
54-
// Build good suffix rule table
55-
var last_prefix = last;
56-
57-
// First pass
58-
{
59-
var i = last + 1;
60-
while (i > 0) : (i -= 1) {
61-
if (std.mem.startsWith(u8, pattern, pattern[i..])) {
62-
last_prefix = i;
63-
}
64-
self.good_suffix[i - 1] = last_prefix + last - (i - 1);
65-
}
66-
}
67-
68-
// Second pass
69-
{
70-
var i: usize = 0;
71-
while (i < last) : (i += 1) {
72-
const len_suffix = longestCommonSuffix(pattern, pattern[1 .. i + 1]);
73-
if (pattern[i - len_suffix] != pattern[last - len_suffix]) {
74-
self.good_suffix[last - len_suffix] = len_suffix + last - i;
75-
}
76-
}
77-
}
78-
79-
return self;
80-
}
81-
82-
/// Frees all memory allocated by this string searcher
83-
pub fn deinit(self: *Self) void {
84-
if (self.allocator) |allocator| {
85-
allocator.free(self.good_suffix);
86-
}
87-
}
88-
89-
/// Returns the index of the first occurence of the pattern in the
90-
/// text. Returns null if the pattern wasn't found
91-
pub fn next(self: Self, text: []const u8) ?usize {
92-
var i: usize = self.pattern.len;
93-
while (i <= text.len) {
94-
// Try to match starting from the end of the pattern
95-
var j: usize = self.pattern.len;
96-
while (j > 0 and text[i - 1] == self.pattern[j - 1]) {
97-
i -= 1;
98-
j -= 1;
99-
}
100-
101-
// If we matched until the beginning of the pattern, we
102-
// have a match
103-
if (j == 0) {
104-
return i;
105-
}
106-
107-
// Use the bad character table and the good suffix table
108-
// to advance our position
109-
i += std.math.max(
110-
self.bad_char[text[i - 1]],
111-
self.good_suffix[j - 1],
112-
);
113-
}
114-
115-
return null;
116-
}
117-
};
118-
119-
test "empty pattern" {
120-
const allocator = testing.allocator;
121-
122-
var sf = try StringFinder.init(allocator, "");
123-
defer sf.deinit();
124-
125-
testing.expectEqual(@as(?usize, 0), sf.next("zig"));
126-
testing.expectEqual(@as(?usize, 0), sf.next(""));
127-
testing.expectEqual(@as(?usize, 0), sf.next("a"));
128-
testing.expectEqual(@as(?usize, 0), sf.next("lang"));
129-
}
130-
131-
test "pattern with length 1" {
132-
const allocator = testing.allocator;
133-
134-
var sf = try StringFinder.init(allocator, "a");
135-
defer sf.deinit();
136-
137-
testing.expectEqual(@as(?usize, null), sf.next("zig"));
138-
testing.expectEqual(@as(?usize, null), sf.next(""));
139-
testing.expectEqual(@as(?usize, 0), sf.next("a"));
140-
testing.expectEqual(@as(?usize, 1), sf.next("lang"));
141-
}
142-
143-
test "test matching" {
144-
const allocator = testing.allocator;
145-
146-
var sf = try StringFinder.init(allocator, "zig");
147-
defer sf.deinit();
148-
149-
testing.expectEqual(@as(?usize, 0), sf.next("zig"));
150-
testing.expectEqual(@as(?usize, 0), sf.next("ziglang"));
151-
testing.expectEqual(@as(?usize, 4), sf.next("langzig"));
152-
testing.expectEqual(@as(?usize, 4), sf.next("langziglang"));
153-
testing.expectEqual(@as(?usize, null), sf.next(""));
154-
testing.expectEqual(@as(?usize, null), sf.next("firefox"));
155-
testing.expectEqual(@as(?usize, 8), sf.next("abc abc ziglang"));
3+
test "" {
4+
_ = @import("boyer_moore.zig");
1565
}

0 commit comments

Comments
 (0)