import java.util.*; import java.io.*; public class SentencePart3{ private String text; private String author; private String timestamp; private static ArrayList dictionary = null; private static ArrayList getDict(){ if (dictionary == null){ dictionary = new ArrayList(); try{ //open the csv file for reading File file = new File("C:/words_alpha.txt"); BufferedReader reader = new BufferedReader(new FileReader(file)); String line = null; //loop through each line in the csv while ((line = reader.readLine()) != null){ dictionary.add(line); } }catch(Exception e){ e.printStackTrace(); } } return dictionary; } public SentencePart3(String text, String author, String timestamp){ this.text = text; this.author = author; this.timestamp = timestamp; } public void setText(String text){ this.text = text; } public String getText(){ return text; } public void setAuthor(String author){ this.author = author; } public String getAuthor(){ return author; } public void setTimestamp(String timestamp){ this.timestamp = timestamp; } public String getTimestamp(){ return timestamp; } public String toString(){ return "{author:" + author + ", sentence:\"" + text + "\", timestamp:\"" + timestamp + "\"}"; } /* This method takes a string that represents a row of a spreadsheet, and separates it by commas seen on the line to figure out what was in each column. For example, if the row was the string: 0,31,4/19/2020 0:00,,RJIshak,GlblCtzn,Jakarta Capital Region,call leader help protect refuge covid19 provid qualiti health care The pieces would be: 0 31 4/19/2020 0:00 RJIshak GlblCtzn Jakarta Capital Region call leader help protect refuge covid19 provid qualiti health care Note that one of the cells above is empty. The method then grabs the date, username, and tweet from indices 2, 4, and 6 in the elements above and returns them as a SentencePart3 object. However, an incoming string may have quotes inside of it, such as: 0,31,4/19/2020 0:00,,RJIshak,"GlblCtzn, priyankachopra",Jakarta Capital Region,call leader help protect refuge covid19 provid qualiti health care Here, the quotes are used to create single cell out of "GlblCtzn, priyankachopra" ignoring the comma. This row above would be split as: 0 31 4/19/2020 0:00 RJIshak GlblCtzn, priyankachopra Jakarta Capital Region call leader help protect refuge covid19 provid qualiti health care preserving the comma in "GlblCtzn, priyankachopra" in the element above. */ public static SentencePart3 convertLine(String line){ String[] pieces = new String[7]; String basket = ""; // used to collect contents of a cell int ctr = 0; boolean startQuote = false; // used to keep track if we've seen a quote or not // goes through every character in the line until it sees a comma, and slices the line there for(int i = 0; i < line.length(); i++){ if (line.charAt(i) == ',' && startQuote == false){ // reading in a comma if we haven't seen a quote yet pieces[ctr] = basket; basket = ""; ctr++; } else if (line.charAt(i) == '"'){ // didn't read in a comma AND read in a quote startQuote = ! startQuote; // should be the closing quote we were waiting for, and a comma will be next } else{ basket += line.charAt(i); // add whatever you are looking at to the basket } } pieces[ctr] = basket; // add the final basket to pieces // grabs the date, username, and tweet from pieces String date = pieces[2]; String username = pieces[4]; String tweet = pieces[6]; //clean the date from "4/19/2020 0:00" to "April 19 2020" String [] datePieces = date.split(" "); String first = datePieces[0]; datePieces = first.split(" "); String month = datePieces[0]; if (month.equals("1")) month = "January"; else if (month.equals("2")) month = "February"; else if (month.equals("3")) month = "March"; else if (month.equals("4")) month = "April"; else if (month.equals("5")) month = "May"; else if (month.equals("6")) month = "June"; else if (month.equals("7")) month = "July"; else if (month.equals("8")) month = "August"; else if (month.equals("9")) month = "September"; else if (month.equals("10")) month = "October"; else if (month.equals("11")) month = "November"; else if (month.equals("12")) month = "December"; date = month + " " + datePieces[1] + " " + datePieces[2]; String author = username; String text = tweet; return new SentencePart3(text, author, date); } public String[] splitSentence(){ return this.text.split(" "); } public String[] splitSentenceBigram(){ ArrayList dictionary = getDict(); ArrayList result = new ArrayList(); String[] pieces = text.split(" "); for(int i = 0; i < pieces.length; i++) if (i+1 < pieces.length && dictionary.contains(pieces[i]+pieces[i+1])){ result.add(pieces[i]+pieces[i+1]); i++; } else result.add(pieces[i]); String[] retval = new String[result.size()]; for(int i = 0; i < result.size(); i++) retval[i] = result.get(i).toString(); return retval; } }