-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsuffix_array_parallel.h
More file actions
198 lines (176 loc) · 9.1 KB
/
suffix_array_parallel.h
File metadata and controls
198 lines (176 loc) · 9.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#ifndef SUFFIX_ARRAY_PARALLEL_H_
#define SUFFIX_ARRAY_PARALLEL_H_
//
// https://github.com/cmuparlay/parlaylib/blob/master/examples/suffix_array.h
//
#include <cmath>
#include "parlay/parallel.h"
#include "parlay/primitives.h"
#include "parlay/sequence.h"
#include "parlay/internal/get_time.h"
#include "longest_common_prefix.h"
// **************************************************************
// Suffix Array.
// Input must be a sequence of unsigned integer type. Uses a modified
// and optimized version of the algorithm from:
// Apostolico, Iliopoulos, Landau, Schieber, and Vishkin.
// Optimal parallel suffix tree construction.
// STOC '94.
// The work is O(n log n) work in the worst case, but for most inputs
// it does O(n) work beyond a sort on constant length integer keys.
// The depth is O(log^2 n) assuming the sort is within that bound.
// Each round doubles the substring lengths that have been sorted but
// drops any strings that are already sorted (i.e. have no equal
// strings for the current length). For most inputs, most strings
// (suffixes) drop out early.
// This code only works on up to 2^32 - 12 characters,
// Chars treated as unsigned, and 0 (null character) not allowed.
// **************************************************************
template <typename char_range>
auto suffix_array(const char_range& S) {
using index = unsigned int;
using ulong = unsigned long;
struct seg { index start; index end; };
index n = S.size();
int granularity = 100; // only effects performance, and not by much
// pack 12 chars starting at each index, and index into two unsigned longs and sort.
auto s = [&] (index i) -> index {return (i < n) ? (unsigned char) S[i] + 1 : 0;};
auto Clx = parlay::delayed_tabulate(n, [&] (ulong i) {
ulong high = 0, low = 0;
for (int j=0; j < 8; j++) high = (high << 8) + s(i+j);
for (int j=0; j < 4; j++) low = (low << 8) + s(8+i+j);
return std::pair{high, (low << 32) + i};});
auto Cl = parlay::sort(Clx);
// Unpack sorted pairs placing index in sorted, and marking where
// values changed in flags. Now correctly sorted up to 12 characters.
auto sorted = parlay::sequence<index>::uninitialized(n);
auto ranks = parlay::sequence<index>::uninitialized(n);
auto flags = parlay::sequence<bool>::uninitialized(n);
parlay::parallel_for(0, n, [&] (long j) {
auto [high,low] = Cl[j];
sorted[j] = low & ((1ul << 32) - 1);
flags[j] = ((j == 0) || (high != Cl[j-1].first) ||
(low >> 32) != (Cl[j-1].second >> 32));});
// Given flags indicating segment boundaries within a segment,
// splits into new segments. See below for definition of segments.
// Also updates the ranks
auto segs_from_flags = [&] (parlay::sequence<bool>& flags, index seg_start) {
auto offsets = parlay::pack_index(flags);
auto segs = parlay::tabulate(offsets.size(), [&] (ulong j) {
index start = seg_start + offsets[j];
index end = seg_start + ((j == offsets.size()-1) ? flags.size() : offsets[j+1]);
parlay::parallel_for(start, end, [&] (index i) {ranks[sorted[i]] = start;}, granularity);
return seg{start, end};}, granularity);
return filter(segs, [] (seg s) {return (s.end - s.start) > 1;});};
auto segments = segs_from_flags(flags, 0);
index offset = 12;
// This loop has the following invariants at the start of each iteration
// the suffixes are sorted up to the first "offset" characters
// "sorted" maintains the sorted indices of suffixes so far (eventually the result)
// "segs" maintains segments of contiguous regions of the sorted order that are not
// yet sorted (i.e. first offset characters are equal). Each is represented
// as a start and end index for the segment.
// "ranks" maintains the ranks of each suffix based on the sort so far. All ranks
// for suffixes in the same segment are the same (rank of first in segment).
// Each iteration doubles the size of offset, so will complete in at most log n rounds.
// In practice few segments survive for multiple rounds.
while (segments.size() > 0) {
// for suffixes in live segments grabs ranks from offset away and sorts within segment.
// mark in flags where the keys now differ.
auto flags = parlay::map(segments, [&] (seg segment) {
index s = segment.start;
index l = segment.end - segment.start;
auto p = parlay::tabulate(l, [&] (long i) {
index k = sorted[s + i];
return std::pair{(k + offset >= n) ? 0 : ranks[k + offset] + 1, k};}, granularity);
parlay::sort_inplace(p);
parlay::sequence<bool> flags(l);
parlay::parallel_for(0, l, [&] (long i) {
sorted[s + i] = p[i].second;
flags[i] = (i == 0) || p[i].first != p[i-1].first;}, granularity);
return flags;}, 1);
// break segments into smaller ones and throw away ones of length 1
segments = parlay::flatten(parlay::tabulate(segments.size(), [&] (long i) {
return segs_from_flags(flags[i], segments[i].start);}, 1));
offset = 2 * offset;
}
auto height = lcp(S, sorted);
auto lcp = parlay::sequence<index>::uninitialized(n);
lcp[0] = 0;
parlay::copy(height, parlay::make_slice(lcp.begin() + 1, lcp.end()));
return std::make_tuple(ranks, sorted, lcp);
}
template <typename char_range>
auto suffix_array_large_alphabet(const char_range& S) {
using index = unsigned int;
using ulong = unsigned long;
struct seg { index start; index end; };
index n = S.size();
int granularity = 100; // only effects performance, and not by much
// pack 3 chars starting at each index, and index into two unsigned longs and sort.
auto s = [&] (index i) -> index {return (i < n) ? (unsigned int) S[i] + 1 : 0;};
auto Clx = parlay::delayed_tabulate(n, [&] (ulong i) {
ulong high = (((ulong) s(i)) << 32) + s(i + 1);
ulong low = (((ulong) s(i + 2)) << 32) + i;
return std::pair{high, low};});
auto Cl = parlay::sort(Clx);
// Unpack sorted pairs placing index in sorted, and marking where
// values changed in flags. Now correctly sorted up to 12 characters.
auto sorted = parlay::sequence<index>::uninitialized(n);
auto ranks = parlay::sequence<index>::uninitialized(n);
auto flags = parlay::sequence<bool>::uninitialized(n);
parlay::parallel_for(0, n, [&] (long j) {
auto [high,low] = Cl[j];
sorted[j] = low & ((1ul << 32) - 1);
flags[j] = ((j == 0) || (high != Cl[j-1].first) ||
(low >> 32) != (Cl[j-1].second >> 32));});
// Given flags indicating segment boundaries within a segment,
// splits into new segments. See below for definition of segments.
// Also updates the ranks
auto segs_from_flags = [&] (parlay::sequence<bool>& flags, index seg_start) {
auto offsets = parlay::pack_index(flags);
auto segs = parlay::tabulate(offsets.size(), [&] (ulong j) {
index start = seg_start + offsets[j];
index end = seg_start + ((j == offsets.size()-1) ? flags.size() : offsets[j+1]);
parlay::parallel_for(start, end, [&] (index i) {ranks[sorted[i]] = start;}, granularity);
return seg{start, end};}, granularity);
return filter(segs, [] (seg s) {return (s.end - s.start) > 1;});};
auto segments = segs_from_flags(flags, 0);
index offset = 3;
// This loop has the following invariants at the start of each iteration
// the suffixes are sorted up to the first "offset" characters
// "sorted" maintains the sorted indices of suffixes so far (eventually the result)
// "segs" maintains segments of contiguous regions of the sorted order that are not
// yet sorted (i.e. first offset characters are equal). Each is represented
// as a start and end index for the segment.
// "ranks" maintains the ranks of each suffix based on the sort so far. All ranks
// for suffixes in the same segment are the same (rank of first in segment).
// Each iteration doubles the size of offset, so will complete in at most log n rounds.
// In practice few segments survive for multiple rounds.
while (segments.size() > 0) {
// for suffixes in live segments grabs ranks from offset away and sorts within segment.
// mark in flags where the keys now differ.
auto flags = parlay::map(segments, [&] (seg segment) {
index s = segment.start;
index l = segment.end - segment.start;
auto p = parlay::tabulate(l, [&] (long i) {
index k = sorted[s + i];
return std::pair{(k + offset >= n) ? 0 : ranks[k + offset] + 1, k};}, granularity);
parlay::sort_inplace(p);
parlay::sequence<bool> flags(l);
parlay::parallel_for(0, l, [&] (long i) {
sorted[s + i] = p[i].second;
flags[i] = (i == 0) || p[i].first != p[i-1].first;}, granularity);
return flags;}, 1);
// break segments into smaller ones and throw away ones of length 1
segments = parlay::flatten(parlay::tabulate(segments.size(), [&] (long i) {
return segs_from_flags(flags[i], segments[i].start);}, 1));
offset = 2 * offset;
}
auto height = lcp(S, sorted);
auto lcp = parlay::sequence<index>::uninitialized(n);
lcp[0] = 0;
parlay::copy(height, parlay::make_slice(lcp.begin() + 1, lcp.end()));
return std::make_tuple(ranks, sorted, lcp);
}
#endif // namespace SUFFIX_ARRAY_PARALLEL_H_