/******************************************************************************
* Compilation: javac Manber.java
* Execution: java Manber < text.txt
* Dependencies: StdIn.java
*
* Reads a text corpus from stdin and sorts the suffixes
* in subquadratic time using a variant of Manber's algorithm.
*
******************************************************************************/
public class Manber {
private int n; // length of input string
private String text; // input text
private int[] index; // offset of ith string in order
private int[] rank; // rank of ith string
private int[] newrank; // rank of ith string (temporary)
private int offset;
public Manber(String s) {
n = s.length();
text = s;
index = new int[n+1];
rank = new int[n+1];
newrank = new int[n+1];
// sentinels
index[n] = n;
rank[n] = -1;
msd();
doit();
}
/**
* Returns the length of the input string.
* @return the length of the input string
*/
public int length() {
return n;
}
/**
* Returns the index into the original string of the ith smallest suffix.
* That is, {@code text.substring(sa.index(i), n)}
* is the ith smallest suffix.
* @param i an integer between 0 and n-1
* @return the index into the original string of the ith smallest suffix
* @throws java.lang.IllegalArgumentException unless 0 <= i < n
*/
public int index(int i) {
if (i < 0 || i >= n) throw new IllegalArgumentException();
return index[i];
}
/**
* Returns the ith smallest suffix as a string.
* @param i the index
* @return the i smallest suffix as a string
* @throws java.lang.IllegalArgumentException unless 0 <= i < n
*/
public String select(int i) {
if (i < 0 || i >= n) throw new IllegalArgumentException();
return text.substring(index[i]);
}
// do one pass of msd sorting by rank at given offset
private void doit() {
for (offset = 1; offset < n; offset += offset) {
int count = 0;
for (int i = 1; i <= n; i++) {
if (rank[index[i]] == rank[index[i-1]]) count++;
else if (count > 0) {
// sort
int left = i-1-count;
int right = i-1;
quicksort(left, right);
// now fix up ranks
int r = rank[index[left]];
for (int j = left + 1; j <= right; j++) {
if (less(index[j-1], index[j])) {
r = rank[index[left]] + j - left;
}
newrank[index[j]] = r;
}
// copy back - note can't update rank too eagerly
for (int j = left + 1; j <= right; j++) {
rank[index[j]] = newrank[index[j]];
}
count = 0;
}
}
}
}
// sort by leading char, assumes extended ASCII (256 values)
private void msd() {
final int R = 256;
// calculate frequencies
int[] freq = new int[R];
for (int i = 0; i < n; i++)
freq[text.charAt(i)]++;
// calculate cumulative frequencies
int[] cumm = new int[R];
for (int i = 1; i < R; i++)
cumm[i] = cumm[i-1] + freq[i-1];
// compute ranks
for (int i = 0; i < n; i++)
rank[i] = cumm[text.charAt(i)];
// sort by first char
for (int i = 0; i < n; i++)
index[cumm[text.charAt(i)]++] = i;
}
/******************************************************************************
* Helper functions for comparing suffixes.
******************************************************************************/
/**********************************************************************
* Is the substring text[v..n] lexicographically less than the
* substring text[w..n] ?
**********************************************************************/
private boolean less(int v, int w) {
return rank[v + offset] < rank[w + offset];
}
/******************************************************************************
* Quicksort code from Sedgewick 7.1, 7.2.
******************************************************************************/
// swap pointer sort indices
private void exch(int i, int j) {
int swap = index[i];
index[i] = index[j];
index[j] = swap;
}
// SUGGEST REPLACING WITH 3-WAY QUICKSORT SINCE ELEMENTS ARE
// RANKS AND THERE MAY BE DUPLICATES
private void quicksort(int lo, int hi) {
if (hi <= lo) return;
int i = partition(lo, hi);
quicksort(lo, i-1);
quicksort(i+1, hi);
}
private int partition(int lo, int hi) {
int i = lo-1, j = hi;
int v = index[hi];
while (true) {
// find item on left to swap
while (less(index[++i], v))
if (i == hi) break; // redundant
// find item on right to swap
while (less(v, index[--j]))
if (j == lo) break;
// check if pointers cross
if (i >= j) break;
exch(i, j);
}
// swap with partition element
exch(i, hi);
return i;
}
/**
* Unit tests the {@code Manber} data type.
*
* @param args the command-line arguments
*/
public static void main(String[] args) {
String s = StdIn.readAll().replaceAll("\\s+", " ").trim();
Manber suffix = new Manber(s);
StdOut.println(" i ind select");
StdOut.println("------------------------");
for (int i = 0; i < s.length(); i++) {
int index = suffix.index(i);
String ith = "\"" + s.substring(index, Math.min(index + 50, s.length())) + "\"";
StdOut.printf("%4d %4d %s\n", i, index, ith);
}
}
}