Thursday, July 8, 2010

Solution to Code Golf: Word Frequency from StackOverflow (Hopefully!)

I just did this to kill my time. Not sure if it's the correct solution for this problem
http://stackoverflow.com/questions/3169051/code-golf-word-frequency-chart
but it seems to fulfill all the requirements :)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import java.io.*;
import java.util.*;
 
public class WordFrequency {
 
    public static void main(String[] args) throws Exception {
        File f = new File(args[0]);
        final Map<String, Integer> map = new HashMap<String, Integer>();
        BufferedReader br = null;
        try {
            br = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
            String line = "";
            while ((line = br.readLine()) != null) {
                String[] words = line.toLowerCase().trim().replaceAll("[^a-z]", " ").split("\\s+");
                for (String word : words) {
                    word = word.trim();
                    if (!word.matches("the|and|of|to|a|o|i|i[tns]|or|")) {
                        if (!map.containsKey(word)) {
                            map.put(word, new Integer(1));
                        } else {
                            map.put(word, map.get(word) + 1);
                        }
                    }
                }
            }
            List<string> sortedKeysByValue = new ArrayList<string>(map.keySet());
            Collections.sort(sortedKeysByValue, new Comparator<string>() {
                @Override
                public int compare(String key1, String key2) {
                    return map.get(key2).compareTo(map.get(key1));
                }
            });
            sortedKeysByValue = sortedKeysByValue.subList(0, 22);
            List<string> sortedKeysByLength = new ArrayList<string>(sortedKeysByValue);
            Collections.sort(sortedKeysByLength, new Comparator<string>() {
                @Override
                public int compare(String str1, String str2) {
                    return Integer.valueOf(str2.length()).compareTo(Integer.valueOf(str1.length()));
                }
            });
            String longestString = sortedKeysByLength.get(0);
            int maxWidth = 80 - 4 - longestString.length();
            double scale = 1 / ((double) map.get(sortedKeysByValue.get(0)) / (double) maxWidth);
            double val = 0.0;
            for (String key : sortedKeysByValue) {
                val = ((double) map.get(key) / (double) maxWidth);
                double len = val * scale * maxWidth;
                System.out.print(" ");
                for (int n = 0; n < len; n++) {
                    System.out.print("_");
                }
                System.out.println();
                System.out.print("|");
                for (int n = 0; n < len; n++) {
                    System.out.print("_");
                }
                System.out.println("| " + key + " ");
            }
        } finally {
            if (br != null) {
                br.close();
            }
        }
    }
}

The 'cat WordFrequency.java | wc -m' gives me 2794 characters :p Anyway, I didn't bother to obfuscate my solution to make it shorter. It was all for fun. Enjoy! :)

No comments:

Post a Comment