Concordance.java


Below is the syntax highlighted version of Concordance.java from §3.5 Searching Applications.


/******************************************************************************
 *  Compilation:  javac Concordance.java
 *  Execution:    java Concordance < input.txt
 *  Dependencies: ST.java Queue.java In.java StdIn.java
 *
 *  % java Concordane tale.txt
 *  cities
 *  tongues of the two *cities* that were blended in
 *
 *  majesty
 *  their turnkeys and the *majesty* of the law fired
 *  me treason against the *majesty* of the people in
 *  of his most gracious *majesty* king george the third
 *
 *  princeton
 *
 *  Build a concordance for a text corpus.
 *
 ******************************************************************************/


public class Concordance {

    public static void main(String[] args) {
        int CONTEXT = 5;

        In in = new In(args[0]);
        String[] words = in.readAllStrings();
        ST<String, Queue<Integer>> st = new ST<String, Queue<Integer>>();

        // build up concordance
        for (int i = 0; i < words.length; i++) {
            String s = words[i];
            if (!st.contains(s)) {
                st.put(s, new Queue<Integer>());
            }
            Queue<Integer> queue = st.get(s);
            queue.enqueue(i);
        }
        StdOut.println("Finished building concordance");

        // process queries
        while (!StdIn.isEmpty()) {
            String query = StdIn.readString();
            Queue<Integer> queue = st.get(query);
            if (queue == null) queue = new Queue<Integer>();
            for (int k : queue) {
                for (int i = Math.max(0, k - CONTEXT + 1); i < k; i++)
                    StdOut.print(words[i] + " ");
                StdOut.print("*" + words[k] + "* ");
                for (int i = k + 1; i < Math.min(k + CONTEXT, words.length); i++)
                    StdOut.print(words[i] + " ");
                StdOut.println();
            }
            StdOut.println();
        }

    }
}


Copyright © 2000–2024, Robert Sedgewick and Kevin Wayne.
Last updated: Wed Mar 27 09:28:47 AM EDT 2024.