AuthorSignature.java¶
public class AuthorSignature
{
// declare private instance variables here
private String authorName;
private double avgWordLength, differentWordRatio, hapaxRatio, avgWordsPerSentence, avgPhrasesPerSentence;
private static final double[] WEIGHT = {11.0, 33.0, 50.0, 0.4, 4.0};
public AuthorSignature(String authorName, double avgWordLength, double differentWordRatio, double hapaxRatio, double avgWordsPerSentence, double avgPhrasesPerSentence )
{
this.authorName = authorName;
this.avgWordLength = avgWordLength;
this.differentWordRatio = differentWordRatio;
this.hapaxRatio = hapaxRatio;
this.avgWordsPerSentence = avgWordsPerSentence;
this.avgPhrasesPerSentence = avgPhrasesPerSentence;
// "the job of the constructor is to initialize the private instance varables"
}
public double distanceTo(AuthorSignature a){
double[] unknownSet = a.getSignatureSet();
double[] thisSet = this.getSignatureSet();
double sum = 0.0;
for (int i = 0; i < 5; i++) {
sum += Math.abs(thisSet[i] - unknownSet[i]) * WEIGHT[i];
}
return sum;
}
// you'll need five more methods here
public String getName(){return authorName;}
public double getAvgWordLength(){return avgWordLength;}
public double getDifferentWordRatio(){return differentWordRatio;}
public double getHapaxRatio(){return hapaxRatio;}
public double getAvgWordsPerSentence(){return avgWordsPerSentence;}
public double getAvgPhrasesPerSentence(){return avgPhrasesPerSentence;}
public double[] getSignatureSet(){
double[] result = {avgWordLength, differentWordRatio, hapaxRatio, avgWordsPerSentence, avgPhrasesPerSentence};
return result;
}
}
AuthorshipDetection.java¶
import acm.program.*;
import java.util.ArrayList;
public class AuthorshipDetection extends ConsoleProgram
{
private static final String PUNCTUATION = "'!\",;:.-?)([]<>*#\n\t\r ";
private static final String VOWELS = "aeiouy";
private AuthorSignature[] authors;
public void run()
{
loadAuthorSignatures();
String filename = readLine("Enter file name: ");
String fileContents = FileHelper.getFileContents(filename);
ArrayList<String> sentences = getSentenceFromContents(fileContents);
ArrayList<String> words = new ArrayList<String>();
AuthorSignature unknownAuthor;
for (String sentence: sentences)
{
ArrayList<String> splitSentences = getWordsFromSentence(sentence);
for (String word: splitSentences)
{
words.add(word);
}
}
println("Sentences====" + sentences.size());
println("Words====" + words.size());
println();
double a = computerAverageWordLength(words);
println(" Avg Word Length = " + a);
double b = computeDifferentWordRatio(words);
println(" Diff word Ratio = " + b);
double c = computeHapaxLegomenaRatio(words);
println(" Hapax legomanana ratio = " + c);
double d = computeAverageWordsPerSentence(sentences);
println(" Avg words per sentence = " + d);
double e = computeSentenceComplexity(sentences);
println(" Sentence complexity = " + e);
unknownAuthor = new AuthorSignature("unknown", a, b, c, d, e);
println();
String champName = "";
double champScore = Integer.MAX_VALUE;
for (AuthorSignature author:authors){
double score = author.distanceTo(unknownAuthor);
if (score < champScore){
champScore = score;
champName = author.getName();
}
println(author.getName() + ": " + score);
}
println();
println("Predicted author = " + champName);
// task #1 goes here
}
// you'll do tasks #2 through #12 here
// I wrote this method for you
private void loadAuthorSignatures()
{
authors = new AuthorSignature[13];
authors[0] = new AuthorSignature("Agatha Christie", 4.40212537354, 0.103719383127, 0.0534892315963, 10.0836888743, 1.90662947161);
authors[1] = new AuthorSignature("Alexandre Dumas", 4.38235547477, 0.049677588873, 0.0212183996175, 15.0054854981, 2.63499369483);
authors[2] = new AuthorSignature("Brothers Grimm", 3.96868608302, 0.0529378997714, 0.0208217283571, 22.2267197987, 3.4129614094);
authors[3] = new AuthorSignature("Charles Dickens", 4.34760725241, 0.0803220950584, 0.0390662700499, 16.2613453121, 2.87721723105);
authors[4] = new AuthorSignature("Douglas Adams", 4.33408042189, 0.238435104414, 0.141554321967, 13.2874354561, 1.86574870912);
authors[5] = new AuthorSignature("Emily Bronte", 4.35858972311, 0.089662598104, 0.0434307152651, 16.1531664212, 2.93439550141);
authors[6] = new AuthorSignature("Fyodor Dostoevsky", 4.34066732195, 0.0528571428571, 0.0233414043584, 12.8108273249, 2.16705364781);
authors[7] = new AuthorSignature("James Joyce", 4.52346300961, 0.120109917189, 0.0682315429476, 10.9663296918, 1.79667373227);
authors[8] = new AuthorSignature("Jane Austen", 4.41553119311, 0.0563451817574, 0.02229943808, 16.8869087498, 2.54817097682);
authors[9] = new AuthorSignature("Lewis Caroll", 4.22709528497, 0.111591342227, 0.0537026953444, 16.2728740581, 2.86275565124);
authors[10] = new AuthorSignature("Mark Twain", 4.33272222298, 0.117254215021, 0.0633074228159, 14.3548573631, 2.43716268311);
authors[11] = new AuthorSignature("Sir Arthur Conan Doyle", 4.16808311494, 0.0822989796874, 0.0394458485444, 14.717564466, 2.2220872148);
authors[12] = new AuthorSignature("William Shakespeare", 4.16216957834, 0.105602561171, 0.0575348730848, 9.34707371975, 2.24620146314);
}
private ArrayList<String> getSentenceFromContents(String fileContents){
ArrayList<String> result = new ArrayList<String>();
int last = 0;
for (int i = 0; i < fileContents.length() - 1; i ++){
if (fileContents.substring(i, i+1).equals(".") ||
fileContents.substring(i, i+1).equals("?") ||
fileContents.substring(i, i+1).equals("!")){
result.add(fileContents.substring(last + 1, i));
last = i;
}
}
return result;
}
private ArrayList<String> getWordsFromSentence(String sentence)
{
ArrayList<String> result = new ArrayList<String>();
String[] splitSentence = sentence.split(" ");
for (int i=0; i<splitSentence.length; i++)
{
String cleaned = clean(splitSentence[i]);
if (cleaned.length() > 0){
result.add(cleaned);
}
}
return result;
}
private ArrayList<String> getAllWordsFromSetences(ArrayList<String> sentences)
{
ArrayList<String> result = new ArrayList<String>();
for(String sentence: sentences)
{
ArrayList<String> a = getWordsFromSentence(sentence);
for (String word: a){
result.add(word);
}
}
return result;
}
private String clean(String word)
{
word = word.toLowerCase();
if (word.length() == 0){
return "";
}
while (PUNCTUATION.indexOf(word.substring(0,1)) != -1)
{
word = word.substring(1);
if (word.length() == 0){
return "";
}
}
while (PUNCTUATION.indexOf(word.substring(word.length()-1)) != -1)
{
word = word.substring(0, word.length() - 1);
}
return word;
}
//calculation Methods
private double computerAverageWordLength(ArrayList<String> words)
{
int counter =0;
for (String word: words)
{
counter+=word.length();
}
return (1.0 * counter)/words.size();
}
private ArrayList<String> getUniqueWords(ArrayList<String> words){
//fill result arraylist with all unique strings in words
ArrayList<String> result = new ArrayList<String>();
for (String word: words){
if (!result.contains(word)){
result.add(word);
}
}
return result;
}
private double computeDifferentWordRatio(ArrayList<String> words){
//compute ratio of unique words to total words
ArrayList<String> uniqueWords = getUniqueWords(words);
return (1.0 * uniqueWords.size())/words.size();
}
private int frequency(ArrayList<String> words, String word ){
int counter = 0;
for (String test : words){
if (word.equals(test)){
counter++;
}
}
return counter;
}
private double computeHapaxLegomenaRatio(ArrayList<String> words){
//compute ratio of words that appear only once to total words
ArrayList<String> uniqueWords = getUniqueWords(words);
int counter = 0;
for (String word: uniqueWords){
if (frequency(words, word) == 1){
counter++;
}
}
return (1.0 * counter)/words.size();
}
private double computeAverageWordsPerSentence(ArrayList<String> sentences){
//compute average number of words per sentence
int counter = 0;
for (String sentence: sentences){
counter += getWordsFromSentence(sentence).size();
}
return (1.0 * counter)/sentences.size();
}
public ArrayList<String> getPhrasesFromSentence(String sentence) {
ArrayList<String> result = new ArrayList<String>();
int last = 0;
for (int i = 0; i < sentence.length() - 1; i ++){
if (sentence.substring(i, i+1).equals(",") ||
sentence.substring(i, i+1).equals(";") ||
sentence.substring(i, i+1).equals(":")){
result.add(sentence.substring(last + 1, i));
last = i;
}
}
result.add(sentence.substring(last + 1));
return result;
}
private double computeSentenceComplexity(ArrayList<String> sentences){
//compute average number of phrases per sentence
int counter = 0;
for (String sentence: sentences){
counter += getPhrasesFromSentence(sentence).size();
}
return (1.0 * counter)/sentences.size();
}
}
Last update:
June 5, 2023
Created: June 5, 2023
Created: June 5, 2023