|
1 | | -const std = @import("std"); |
2 | | -const Allocator = std.mem.Allocator; |
3 | | -const testing = std.testing; |
| 1 | +pub const boyer_moore = @import("boyer_moore.zig"); |
4 | 2 |
|
5 | | -/// Returns the maximum length of suffixes of both strings |
6 | | -fn longestCommonSuffix(a: []const u8, b: []const u8) usize { |
7 | | - var i: usize = 0; |
8 | | - while (i < a.len and i < b.len) : (i += 1) { |
9 | | - if (a[(a.len - 1) - i] != b[(b.len - 1) - i]) { |
10 | | - break; |
11 | | - } |
12 | | - } |
13 | | - return i; |
14 | | -} |
15 | | - |
16 | | -pub const StringFinder = struct { |
17 | | - allocator: ?*Allocator, |
18 | | - pattern: []const u8, |
19 | | - bad_char: [possible_values]usize, |
20 | | - good_suffix: []usize, |
21 | | - |
22 | | - const possible_values = std.math.maxInt(u8) + 1; |
23 | | - const Self = @This(); |
24 | | - |
25 | | - /// An empty pattern, requires no allocations |
26 | | - pub const empty = Self{ |
27 | | - .allocator = null, |
28 | | - .pattern = "", |
29 | | - .bad_char = [_]usize{0} ** possible_values, |
30 | | - .good_suffix = &[_]usize{}, |
31 | | - }; |
32 | | - |
33 | | - /// Initializes a StringFinder with a pattern |
34 | | - pub fn init(allocator: *Allocator, pattern: []const u8) !Self { |
35 | | - if (pattern.len == 0) return Self.empty; |
36 | | - |
37 | | - var self = Self{ |
38 | | - .allocator = allocator, |
39 | | - .pattern = pattern, |
40 | | - .bad_char = undefined, |
41 | | - .good_suffix = try allocator.alloc(usize, pattern.len), |
42 | | - }; |
43 | | - |
44 | | - const last = pattern.len - 1; |
45 | | - |
46 | | - // Initialize bad character rule table |
47 | | - for (self.bad_char) |*x| x.* = pattern.len; |
48 | | - |
49 | | - for (pattern) |x, i| { |
50 | | - if (i == last) break; |
51 | | - self.bad_char[x] = last - i; |
52 | | - } |
53 | | - |
54 | | - // Build good suffix rule table |
55 | | - var last_prefix = last; |
56 | | - |
57 | | - // First pass |
58 | | - { |
59 | | - var i = last + 1; |
60 | | - while (i > 0) : (i -= 1) { |
61 | | - if (std.mem.startsWith(u8, pattern, pattern[i..])) { |
62 | | - last_prefix = i; |
63 | | - } |
64 | | - self.good_suffix[i - 1] = last_prefix + last - (i - 1); |
65 | | - } |
66 | | - } |
67 | | - |
68 | | - // Second pass |
69 | | - { |
70 | | - var i: usize = 0; |
71 | | - while (i < last) : (i += 1) { |
72 | | - const len_suffix = longestCommonSuffix(pattern, pattern[1 .. i + 1]); |
73 | | - if (pattern[i - len_suffix] != pattern[last - len_suffix]) { |
74 | | - self.good_suffix[last - len_suffix] = len_suffix + last - i; |
75 | | - } |
76 | | - } |
77 | | - } |
78 | | - |
79 | | - return self; |
80 | | - } |
81 | | - |
82 | | - /// Frees all memory allocated by this string searcher |
83 | | - pub fn deinit(self: *Self) void { |
84 | | - if (self.allocator) |allocator| { |
85 | | - allocator.free(self.good_suffix); |
86 | | - } |
87 | | - } |
88 | | - |
89 | | - /// Returns the index of the first occurence of the pattern in the |
90 | | - /// text. Returns null if the pattern wasn't found |
91 | | - pub fn next(self: Self, text: []const u8) ?usize { |
92 | | - var i: usize = self.pattern.len; |
93 | | - while (i <= text.len) { |
94 | | - // Try to match starting from the end of the pattern |
95 | | - var j: usize = self.pattern.len; |
96 | | - while (j > 0 and text[i - 1] == self.pattern[j - 1]) { |
97 | | - i -= 1; |
98 | | - j -= 1; |
99 | | - } |
100 | | - |
101 | | - // If we matched until the beginning of the pattern, we |
102 | | - // have a match |
103 | | - if (j == 0) { |
104 | | - return i; |
105 | | - } |
106 | | - |
107 | | - // Use the bad character table and the good suffix table |
108 | | - // to advance our position |
109 | | - i += std.math.max( |
110 | | - self.bad_char[text[i - 1]], |
111 | | - self.good_suffix[j - 1], |
112 | | - ); |
113 | | - } |
114 | | - |
115 | | - return null; |
116 | | - } |
117 | | -}; |
118 | | - |
119 | | -test "empty pattern" { |
120 | | - const allocator = testing.allocator; |
121 | | - |
122 | | - var sf = try StringFinder.init(allocator, ""); |
123 | | - defer sf.deinit(); |
124 | | - |
125 | | - testing.expectEqual(@as(?usize, 0), sf.next("zig")); |
126 | | - testing.expectEqual(@as(?usize, 0), sf.next("")); |
127 | | - testing.expectEqual(@as(?usize, 0), sf.next("a")); |
128 | | - testing.expectEqual(@as(?usize, 0), sf.next("lang")); |
129 | | -} |
130 | | - |
131 | | -test "pattern with length 1" { |
132 | | - const allocator = testing.allocator; |
133 | | - |
134 | | - var sf = try StringFinder.init(allocator, "a"); |
135 | | - defer sf.deinit(); |
136 | | - |
137 | | - testing.expectEqual(@as(?usize, null), sf.next("zig")); |
138 | | - testing.expectEqual(@as(?usize, null), sf.next("")); |
139 | | - testing.expectEqual(@as(?usize, 0), sf.next("a")); |
140 | | - testing.expectEqual(@as(?usize, 1), sf.next("lang")); |
141 | | -} |
142 | | - |
143 | | -test "test matching" { |
144 | | - const allocator = testing.allocator; |
145 | | - |
146 | | - var sf = try StringFinder.init(allocator, "zig"); |
147 | | - defer sf.deinit(); |
148 | | - |
149 | | - testing.expectEqual(@as(?usize, 0), sf.next("zig")); |
150 | | - testing.expectEqual(@as(?usize, 0), sf.next("ziglang")); |
151 | | - testing.expectEqual(@as(?usize, 4), sf.next("langzig")); |
152 | | - testing.expectEqual(@as(?usize, 4), sf.next("langziglang")); |
153 | | - testing.expectEqual(@as(?usize, null), sf.next("")); |
154 | | - testing.expectEqual(@as(?usize, null), sf.next("firefox")); |
155 | | - testing.expectEqual(@as(?usize, 8), sf.next("abc abc ziglang")); |
| 3 | +test "" { |
| 4 | + _ = @import("boyer_moore.zig"); |
156 | 5 | } |
0 commit comments