// WordTool.java // // Author: Rahul Simha // September 2006 // Updated: 2017, 2019 // // A general purpose tool to work with words. Needs // two text files: words.txt (the Unix dictionary) // and wordsWithPOSAndPron.txt (words with Parts-of-Speech), an edited // version of the Moby project, using only words from the Unix // dictionary, about 20,000 words. import java.io.*; import java.util.*; import java.util.regex.*; public class WordTool { static ArrayList allWords = new ArrayList (); static ArrayList nouns = new ArrayList (); // N static ArrayList pluralNouns = new ArrayList (); // p static ArrayList verbs = new ArrayList (); // V static ArrayList transVerbs = new ArrayList (); // t static ArrayList inTransVerbs = new ArrayList (); // i static ArrayList adjectives = new ArrayList (); // A static ArrayList adverbs = new ArrayList (); // v static ArrayList conjunctives = new ArrayList (); // C static ArrayList prepositions = new ArrayList (); // P static ArrayList pronouns = new ArrayList (); // r // Given a word, produce its pronunciation (phoneme breakdown) static HashMap pronunciations = new HashMap(); static ArrayList klingonWords = new ArrayList (); public static void main (String[] argv) { // For testing only. //readWordsPOSAndPron (); //readWordsPOSAndPronLarge (); HashSet words = getScrabbleWordsUpperCase (); } ///////////////////////////////////////////////////////////////////// // Random Letters and strings public static char getRandomLetter () { Random rand = new Random (); int k = rand.nextInt (25); char ch = (char) (97 + k); return ch; } public static char getRandomCapLetter () { Random rand = new Random (); int k = rand.nextInt (25); char ch = (char) (65 + k); return ch; } public static String getRandomString (int length) { char[] letters = new char [length]; Random rand = new Random (); for (int i=0; i maxLength) { return null; } if (minLength < 1) { return null; } Random rand = new Random (); int diff = rand.nextInt (maxLength-minLength+1); int length = minLength + diff; char[] letters = new char [length]; for (int i=0; i allWordsList = getAllWordsAsList(); String[] allWordsArray = new String [allWordsList.size()]; int k = 0; for (String w: allWordsList) { allWordsArray[k++] = w; } return allWordsArray; } public static String[] getDictionary () { return getAllWords(); } public static ArrayList getAllWordsAsList () { if (allWords.size() <= 0) { readWordsPOSAndPron (); } // We're now going to remove duplicates because some // words occur as both verb and noun, e.g. "scrub" HashSet uniqueWords = new HashSet<>(); for (String w: allWords) { uniqueWords.add (w); } allWords = new ArrayList<>(); for (String w: uniqueWords) { allWords.add (w); } Collections.sort (allWords); return allWords; } public static ArrayList getUnixWordsAsList () { ArrayList unixWordsList = new ArrayList (); String[] unixWords = getUnixWords (); for (String w: unixWords) { unixWordsList.add (w); } return unixWordsList; } public static String[] getUnixWords () { String[] unixWords = getDictionary ("words.txt"); return unixWords; } ///////////////////////////////////////////////////////////////////// // Parts of Speech and pronunciation public static void readWordsPOSAndPron () { readWordsPOSAndPron ("wordsWithPOSAndPron.txt"); } public static void readWordsPOSAndPronLarge () { readWordsPOSAndPron ("wordsWithPOSAndPronLarge.txt"); } public static ArrayList getNounsAsList () { if (nouns.size() <= 0) { readWordsPOSAndPron (); } return nouns; } public static String[] getNouns () { return (String[]) getNounsAsList().toArray(); } public static String getRandomNoun () { return pickRandom (getNounsAsList()); } public static ArrayList getPluralNounsAsList () { if (pluralNouns.size() <= 0) { readWordsPOSAndPron (); } return pluralNouns; } public static String[] getPluralNouns () { return (String[]) getPluralNounsAsList().toArray(); } public static String getRandomPluralNoun () { return pickRandom (getPluralNounsAsList()); } public static ArrayList getVerbsAsList () { if (verbs.size() <= 0) { readWordsPOSAndPron (); } return verbs; } public static String[] getVerbs () { return (String[]) getVerbsAsList().toArray(); } public static String getRandomVerb () { return pickRandom (getVerbsAsList()); } public static ArrayList getTransVerbsAsList () { if (transVerbs.size() <= 0) { readWordsPOSAndPron (); } return transVerbs; } public static String[] getTransVerbs () { return (String[]) getTransVerbsAsList().toArray(); } public static String getRandomTransVerb () { return pickRandom (getTransVerbsAsList()); } public static ArrayList getInTransVerbsAsList () { if (inTransVerbs.size() <= 0) { readWordsPOSAndPron (); } return inTransVerbs; } public static String[] getInTransVerbs () { return (String[]) getInTransVerbsAsList().toArray(); } public static String getRandomInTransVerb () { return pickRandom (getInTransVerbsAsList()); } public static ArrayList getAdjectivesAsList () { if (adjectives.size() <= 0) { readWordsPOSAndPron (); } return adjectives; } public static String[] getAdjectives () { return (String[]) getAdjectivesAsList().toArray(); } public static String getRandomAdjective () { return pickRandom (getAdjectivesAsList()); } public static ArrayList getAdverbsAsList () { if (adverbs.size() <= 0) { readWordsPOSAndPron (); } return adverbs; } public static String[] getAdverbs () { return (String[]) getAdverbsAsList().toArray(); } public static String getRandomAdverb () { return pickRandom (getAdverbsAsList()); } public static ArrayList getConjunctivesAsList () { if (conjunctives.size() <= 0) { readWordsPOSAndPron (); } return conjunctives; } public static String[] getConjunctives () { return (String[]) getConjunctivesAsList().toArray(); } public static String getRandomConjunctive () { return pickRandom (getConjunctivesAsList()); } public static ArrayList getPrepositionsAsList () { if (prepositions.size() <= 0) { readWordsPOSAndPron (); } return prepositions; } public static String[] getPrepositions () { return (String[]) getPrepositionsAsList().toArray(); } public static String getRandomPreposition () { return pickRandom (getPrepositionsAsList()); } public static ArrayList getPronounsAsList () { if (pronouns.size() <= 0) { readWordsPOSAndPron (); } return pronouns; } public static String[] getPronouns () { return (String[]) getPronounsAsList().toArray(); } public static String getRandomPronoun () { return pickRandom (getPronounsAsList()); } ////////////////////////////////////////////////////////////// // Klingon words: klingonwords.txt public static ArrayList getKlingonWordsAsList () { if ( (klingonWords == null) || (klingonWords.size() == 0) ) { readKlingonWords ("klingonwords.txt"); } return klingonWords; } public static String[] getKlingonWords () { ArrayList klingonWords = getKlingonWordsAsList (); String[] words = new String [klingonWords.size()]; int k = 0; for (String w: klingonWords) { words[k++] = w; } return words; } static void readKlingonWords (String filename) { try { LineNumberReader lnr = new LineNumberReader (new FileReader (filename)); klingonWords = new ArrayList (); String line = lnr.readLine (); while (line != null) { klingonWords.add (line.trim()); line = lnr.readLine (); } } catch (Exception e) { System.out.println (e); e.printStackTrace (); } } ////////////////////////////////////////////////////////////// // Scrabble words: we're using hashsets because the most common // use is to see if a word is in the set or not. public static HashSet getScrabbleWordsLowerCase () { return getScrabbleWords (true); } public static HashSet getScrabbleWordsUpperCase () { return getScrabbleWords (false); } public static HashSet getScrabbleWords (boolean isLowerCase) { try { HashSet scrabbleWords = new HashSet<>(); Scanner scanner = new Scanner (new FileReader ("scrabblewords.txt")); while (scanner.hasNext()) { // At each step, get the next word and place in list. String s = scanner.next(); if (isLowerCase) { scrabbleWords.add (s.toLowerCase()); } else { scrabbleWords.add (s); } //System.out.println ("[" + s + "]"); } return scrabbleWords; } catch (IOException e) { System.out.println (e); System.exit (0); return null; } } ////////////////////////////////////////////////////////////// // Dictionary: words.txt public static String[] getDictionary (String fileName) { String[] words = readDictionary (fileName, null); String[] scrubbedWords = scrub (words); return scrubbedWords; } static String[] readDictionary (String fileName, Pattern pattern) { String[] words = null; try { // Since we don't know in advance how many words there // are, we'll use a list instead of an array. LinkedList stringList = new LinkedList(); // Scanner knows how to skip whitespace. Scanner scanner = new Scanner (new FileReader (fileName)); if (pattern != null) { scanner = scanner.useDelimiter (pattern); } while (scanner.hasNext()) { // At each step, get the next word and place in list. String s = scanner.next(); stringList.addLast (s); } // Now that we know the size, we'll make an array. words = new String [stringList.size()]; Iterator iter = stringList.iterator(); int i = 0; while (iter.hasNext()) { words[i] = iter.next(); i ++; } // Done. return words; } catch (IOException e) { System.out.println (e); System.exit (0); return null; } } static String[] readWords (String fileName) { Pattern p = Pattern.compile ("\\W"); String[] words = readDictionary (fileName, p); String[] scrubbedWords = scrub (words); return scrubbedWords; } static String[] scrub (String[] words) { // Remove words with caps, and single-letter words int badWords = 0; for (int i=0; i 100) break; } } catch (Exception e) { System.out.println (e); e.printStackTrace (); System.exit (0); } } static void parsePOSAndPron (String line) { int posIndex = line.indexOf ('-'); int pronIndex = line.indexOf ('+'); if ((posIndex < 0) || (pronIndex < 0)) { System.out.println ("ERROR: posIndex=" + posIndex + " pronIndex=" + pronIndex); System.exit(0); } String word = line.substring (0, posIndex); if (word.length() <= 1) { return; } String desc = line.substring (posIndex+1, pronIndex); String pron = line.substring (pronIndex+1, line.length()); //System.out.println ("word=[" + word + "] desc=[" + desc + "] pron=[" + pron + "]"); // Words have already been scrubbed. classifyWord (word, desc); pronunciations.put (word, pron); allWords.add (word); } static void classifyWord (String word, String desc) { if (desc.contains("N")) { nouns.add (word); } if (desc.contains("p")) { pluralNouns.add (word); } if (desc.contains("V")) { verbs.add (word); } if (desc.contains("t")) { transVerbs.add (word); } if (desc.contains("i")) { inTransVerbs.add (word); } if (desc.contains("A")) { adjectives.add (word); } if (desc.contains("v")) { adverbs.add (word); } if (desc.contains("C")) { conjunctives.add (word); } if (desc.contains("P")) { prepositions.add (word); } if (desc.contains("r")) { pronouns.add (word); } } static void printAllSizes () { System.out.println ("Nouns:" + nouns.size()); System.out.println ("Plural nouns:" + pluralNouns.size()); System.out.println ("Verbs:" + verbs.size()); System.out.println ("Transitive verbs:" + transVerbs.size()); System.out.println ("Intransitive verbs:" + inTransVerbs.size()); System.out.println ("Adjectives:" + adjectives.size()); System.out.println ("Adverbs:" + adverbs.size()); System.out.println ("Conjunctives:" + conjunctives.size()); System.out.println ("Prepositions:" + pronouns.size()); } static void printAllWords () { System.out.print ("Nouns:"); for (String w: nouns) { System.out.print (" " + w); } System.out.print ("\n\n Plural nouns:"); for (String w: pluralNouns) { System.out.print (" " + w); } System.out.print ("\n\n Verbs:"); for (String w: verbs) { System.out.print (" " + w); } System.out.print ("\n\n Transitive verbs:"); for (String w: transVerbs) { System.out.print (" " + w); } System.out.print ("\n\n Intransitive verbs:"); for (String w: inTransVerbs) { System.out.print (" " + w); } System.out.print ("\n\n Adjectives:"); for (String w: adjectives) { System.out.print (" " + w); } System.out.print ("\n\n Adverbs:"); for (String w: adverbs) { System.out.print (" " + w); } System.out.print ("\n\n Conjunctives:"); for (String w: conjunctives) { System.out.print (" " + w); } System.out.print ("\n\n Prepositions:"); for (String w: prepositions) { System.out.print (" " + w); } System.out.print ("\n\n Pronouns:"); for (String w: pronouns) { System.out.print (" " + w); } System.out.println (); } static String pickRandom (ArrayList wordList) { Random rand = new Random (); int index = rand.nextInt (wordList.size()); return wordList.get(index); } }