简体   繁体   中英

Java word frequency ordered by length then alphabetically

    print(frequency); // instead of print(list);
    }
    public static List<String> sort(List<String> list){
        /** We use string compare to sort the list by length first then Collections.sort will sort it alphabetically  **/
        Collections.sort(list, new Comparator<String>() {
            @Override
            public int compare(String o1, String o2) {
                if (o1.length() < o2.length()) { /**Second string length is greater than first string length**/
                    return -1;
                } else if (o1.length() > o2.length()) {/**Second string length is less than first string length**/
                    return 1;
                } else {        /** Equal **/
                    return o1.compareTo(o2);
                }
            }
        });
        return list;
    }
    /** Simply prints the length and sorted words. Sorted by their length first then alphapetically. **/
    public static void print(Map<String, Integer> frequency) {
        frequency.forEach((word, freq) -> System.out.printf("%d - %w: %d%n", word.length(), word, freq));
    }
        }
    }
    /** Counts how many words **/
    public static void count(SimpleCharacterReader stream) {
        try {
            while (true) {
                a = getReader(stream);
                /** When a space, next line or tab is met, we assume a word was met. **/
                if ((a == ' ') || (a == '\n') || (a == '\t')) {
                    count++;
                }
            }
        } catch (EOFException eof) {
            stream.Dispose();
        }
        arr = new String[count];
    }
    /** Gets the characters **/
    public static char getReader(ICharacterReader reader) throws EOFException {
        return reader.GetNextChar();
    }
    /** Store unsorted words in array **/
    public static void store(ICharacterReader info) throws EOFException {
        int i = 0;
        while (i < count) {
            s = info.GetNextChar();
            if (Character.isLetterOrDigit(s)) {
                word += Character.toString(s);
            } else if (s == ' ' || s == '\n' || s == '\t') {
                arr[i++] = word;
                word = "";
            }
        }
    }
}

I have been tasked with creating a character reader that reads from a project file and creates a word frequency chart ordered by length and then alphabetically if they are the same length. This is the code I have so far but my output has repeat words and no frequency? What is wrong? Thanks. I am not sure where I have gone wrong. Isimplecharacter and Character are the files I was given.

My code is the last one

import java.io.EOFException;
import java.util.Random;

public class SimpleCharacterReader implements ICharacterReader {
    private int m_Pos = 0;

    public static final char lf = '\n';

    private String m_Content = "It was the best of times, it was the worst of times," + 
    lf +
    "it was the age of wisdom, it was the age of foolishness," + 
    lf +
    "it was the epoch of belief, it was the epoch of incredulity," + 
    lf +
    "it was the season of Light, it was the season of Darkness," + 
    lf +
    "it was the spring of hope, it was the winter of despair," + 
    lf +
    "we had everything before us, we had nothing before us," + 
    lf +
    "we were all going direct to Heaven, we were all going direct" + 
    lf +
    "the other way--in short, the period was so far like the present" + 
    lf +
    "period, that some of its noisiest authorities insisted on its" + 
    lf +
    "being received, for good or for evil, in the superlative degree" + 
    lf +
    "of comparison only." + 
    lf +
    "There were a king with a large jaw and a queen with a plain face," + 
    lf +
    "on the throne of England; there were a king with a large jaw and" + 
    lf +
    "a queen with a fair face, on the throne of France.  In both" + 
    lf +
    "countries it was clearer than crystal to the lords of the State" + 
    lf +
    "preserves of loaves and fishes, that things in general were" + 
    lf +
    "settled for ever";

    Random m_Rnd = new Random();

    public char GetNextChar() throws EOFException {

        if (m_Pos >= m_Content.length()) {
            throw new EOFException();
        }

        return m_Content.charAt(m_Pos++);

    }

    public void Dispose() {
        // Do nothing
    }
}

import java.io.EOFException;

public interface ICharacterReader {

    char GetNextChar() throws EOFException;

    void Dispose();
}

import java.lang.Character;
import java.io.EOFException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

public class Analyse {
    static String arr[];        /** Store unsorted words in array**/
    public static int count = 0;
    static char a, s;
    static String word = "";

    public static void main(String[] args) throws EOFException {

        SimpleCharacterReader stream = new SimpleCharacterReader();
        List<String> list = new ArrayList<>();
        String str = "";
        /** Need to know how may words we have**/
        count(stream);
        SimpleCharacterReader st = new SimpleCharacterReader();
        store(st);
        /**Store valid words in list to sort later**/
        for (int i = 0; i < arr.length; i++) {
            if (arr[i] != " " && arr[i] != null) {
                str = arr[i];
            }
            list.add(str);
        }
        sort(list); /**Sort list**/
        print(list);/**Print list**/
    }
    public static List<String> sort(List<String> list){
        /** We use string compare to sort the list by length first then Collections.sort will sort it alphabetically  **/
        Collections.sort(list, new Comparator<String>() {
            @Override
            public int compare(String o1, String o2) {
                if (o1.length() < o2.length()) { /**Second string length is greater than first string length**/
                    return -1;
                } else if (o1.length() > o2.length()) {/**Second string length is less than first string length**/
                    return 1;
                } else {        /** Equal **/
                    return o1.compareTo(o2);
                }
            }
        });
        return list;
    }
    /** Simply prints the length and sorted words. Sorted by their length first then alphapetically. **/
    public static void print(List<String> list) {
        for (int i = 0; i < arr.length; i++) {
            System.out.println(list.get(i).length() + " - " + list.get(i));
        }
    }
    /** Counts how many words **/
    public static void count(SimpleCharacterReader stream) {
        try {
            while (true) {
                a = getReader(stream);
                /** When a space, next line or tab is met, we assume a word was met. **/
                if ((a == ' ') || (a == '\n') || (a == '\t')) {
                    count++;
                }
            }
        } catch (EOFException eof) {
            stream.Dispose();
        }
        arr = new String[count];
    }
    /** Gets the characters **/
    public static char getReader(ICharacterReader reader) throws EOFException {
        return reader.GetNextChar();
    }
    /** Store unsorted words in array **/
    public static void store(ICharacterReader info) throws EOFException {
        int i = 0;
        while (i < count) {
            s = info.GetNextChar();
            if (Character.isLetterOrDigit(s)) {
                word += Character.toString(s);
            } else if (s == ' ' || s == '\n' || s == '\t') {
                arr[i++] = word;
                word = "";
            }
        }
    }
}

You need to collect the words into a TreeMap which supports ordered keys and may be provided with a custom comparator via constructor: public TreeMap(Comparator<? super K> comparator) .

The frequency of each word will be stored as a value. It can be accumulated using Map::merge function.

Assuming that all the words are read into array arr by method store , the code may be updated as follows:

// class Analyse, method main
store(st);
 /**Store valid words in a sorted map and count word frequency **/
// create TreeMap with custom Comparator as a lambda
Map<String, Integer> frequency = new TreeMap<>((s1, s2) -> Integer.compare(s1.length(), s2.length()) == 0 ? s1.compareTo(s2) : Integer.compare(s1.length(), s2.length()));

for (String word : arr) {
    if (null != word && !" ".equals(word)) {
        frequency.merge(word, 1, (acc, one) -> acc + one);
    }
}

print(frequency); // instead of print(list);

Next, method print needs to be updated to process a map instead of the list:

public static void print(Map<String, Integer> frequency) {
    frequency.forEach((word, freq) -> System.out.printf("%d - %w: %d%n", word.length(), word, freq));
}

Also, it is possible to use Stream API to sort the words using custom comparator built as a chain of Comparator.comparing(String::length).thenComparing(String::compareTo) and then collect the frequencies with Collectors.toMap into a LinkedHashMap which maintains insertion order:

// class Analyse, method main
store(st);

Map<String, Integer> frequency = Arrays
    .stream(arr) // Stream<String>
    .filter(word -> null != word && !" ".equals(word))
    .sorted(     // sort
        Comparator.comparingInt(String::length)
                  .thenComparing(String::compareTo) // or compareToIgnoreCase if needed
    )
    .collect(Collectors.toMap(
        word -> word, // use word as a key
        word -> 1,    // 1 as initial value 
        Integer::sum, // merge function to count frequency
        LinkedHashMap::new // maintain insertion order (by sorted keys)
    ));

print(frequency); // instead of print(list);

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM