Answer To: This is a java AssignmentA part of it was done your tutor last...
Valupadasu answered on Apr 08 2021
ActualDataSet.java
ActualDataSet.java
/**
* This class is used for representing an actual dataset, that is, a dataset
* that holds a data matrix
*
* @author Mehrdad Sabetzadeh, University of Ottawa
* @author Guy-Vincent Jourdan, University of Ottawa
*
*/
public class ActualDataSet extends DataSet {
/**
* The data matrix
*/
private String[][] matrix;
/**
* The source identifier for the data. When the data source is a file, sourceId
* will be the name and location of the source file
*/
private String dataSourceId;
/**
* Constructor for ActualDataSet. In addition to initializing dataSourceId,
* numAttributes, numRows and matrix, the constructor needs to create an array of
* attributes (instance of the Attribute class) and initialize the "attributes"
* instance variable of DataSet.
*
*
* @param reader is the DataReader instance to read data from.
*/
public ActualDataSet(DataReader reader) {
this.dataSourceId = reader.getSourceId();
this.numAttributes = reader.getNumberOfColumns();
this.numRows = reader.getNumberOfDataRows();
this.matrix = reader.getData();
createAttributes(reader.getAttributeNames());
}
/**
* Implementation of DataSet's abstract getValueAt method for an actual dataset
*/
public String getValueAt(int row, int attributeIndex) {
if (row < 0 || row >= numRows)
return null;
if (attributeIndex < 0 || attributeIndex >= numAttributes)
return null;
return matrix[row][attributeIndex];
}
/**
* @return the sourceId of the dataset.
*/
public String getSourceId() {
return dataSourceId;
}
private void createAttributes(String[] attributeNames) {
this.attributes = new Attribute[numAttributes];
for (int i = 0; i < numAttributes; i++) {
String[] values = getUniqueAttributeValues(i);
if (Util.isArrayNumeric(values))
this.attributes[i] = new Attribute(attributeNames[i], i, AttributeType.NUMERIC, values);
else
this.attributes[i] = new Attribute(attributeNames[i], i, AttributeType.NOMINAL, values);
}
}
/**
* Returns a virtual dataset over this (actual) dataset
*
* @return a virtual dataset spanning the entire data in this (actual) dataset
*/
public VirtualDataSet toVirtual() {
int[] rows = new int[numRows];
for (int i = 0; i < numRows; i++) {
rows[i] = i;
}
return new VirtualDataSet(this, rows, attributes, "");
}
/**
* Override of toString() in DataSet
*
* @return a string representation of this (actual) dataset.
*/
public String toString() {
return "Actual dataset (" + getSourceId() + ") with " + numAttributes + " attribute(s) and " + numRows + " row(s)"
+ System.lineSeparator() + super.toString();
}
}
Attribute.java
Attribute.java
/**
* This class is used for representing an attribute of a dataset, be the dataset
* an actual one (with a data matrix) or a virtual one (without a data matrix).
*
* @author Mehrdad Sabetzadeh, University of Ottawa
* @author Guy-Vincent Jourdan, University of Ottawa
*
*/
public class Attribute {
/**
* Name of the attribute
*/
private String name;
/**
* Column number of the attribute
*/
private int absoluteIndex;
/**
* Type of the attribute
*/
private AttributeType type;
/**
* Value set of the attribute
*/
private String[] values;
/**
* Constructor for Attribute
*
* @param name is the name label of the attribute
* @param absoluteIndex is the column number where the attribute is located in a
* data matrix. Notice that, for virtual datasets, the
* position of an attribute in the "attributes" array
* (defined in the DataSet class) has no relationship to
* the actual column that the attribute represents. We
* therefore cannot rely on the indices in the "attributes"
* array to give us the data-matrix column number that the
* attribute is associated with.
* @param type is the type of the attribute
* @param values is the set of unique values that the attribute can
* assume. Make sure you create a COPY of the "values"
* array rather than merely stating this.values = values.
*/
public Attribute(String name, int absoluteIndex, AttributeType type, String[] values) {
this.name = name;
this.absoluteIndex = absoluteIndex;
this.type = type;
this.values = new String[values.length];
for (int i = 0; i < values.length; i++) {
this.values[i] = values[i];
}
}
/**
* @return the name of the attribute
*/
public String getName() {
return name;
}
/**
* @return the absolute index (column number) of the attribute
*/
public int getAbsoluteIndex() {
return absoluteIndex;
}
/**
* @return the type of the attribute
*/
public AttributeType getType() {
return type;
}
/**
* @return the value set of the attribute. Make sure you return a COPY of the
* values array (as opposed to the values array itself). This is so that
* no other object can change the value set of this attribute.
*/
public String[] getValues() {
String[] clonedValues = new String[values.length];
for (int i = 0; i < values.length; i++) {
clonedValues[i] = values[i];
}
return clonedValues;
}
/**
* Replaces the value set of the attribute with the supplied parameter
* (newValues). We do not want any other object to have a direct reference to
* the (updated) values array in the attribute. We therefore cannot merely
* state: this.values = newValues. Instead, we need to create a copy of the
* newValues array first.
*
* @param newValues is an array containing the value set that should replace the
* attribute's current value set
*/
public void replaceValues(String[] newValues) {
this.values = new String[newValues.length];
for (int i = 0; i < newValues.length; i++) {
this.values[i] = newValues[i];
}
}
/**
* Creates a deep copy of the attribute. Notice that, for the Attribute class,
* deep copy extends only to the values array. Deep copy does not apply to
* primitive attributes and is unnecessary for immutable objects such as
* strings.
*
* @return a (deep) copy the attribute
*/
public Attribute clone() {
String[] clonedValues = new String[values.length];
for (int i = 0; i < values.length; i++) {
clonedValues[i] = values[i];
}
return new Attribute(this.name, this.absoluteIndex, this.type, clonedValues);
}
/**
* @return a string representation of the attribute
*/
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append(" [absolute index: " + absoluteIndex + "] ");
buffer.append(name);
if (type == AttributeType.NUMERIC)
buffer.append(" (numeric): ");
else
buffer.append(" (nominal): ");
buffer.append("{");
for (int i = 0; i < values.length; i++) {
if (type == AttributeType.NUMERIC)
buffer.append(values[i]);
else
buffer.append('\'').append(values[i]).append('\'');
if (i < values.length - 1)
buffer.append(", ");
}
buffer.append('}');
return buffer.toString();
}
}
AttributeType.java
AttributeType.java
/**
* Enumeration class for attribute types
*
* @author Mehrdad Sabetzadeh, University of Ottawa
* @author Guy-Vincent Jourdan, University of Ottawa
*
*/
public enum AttributeType {
NOMINAL, NUMERIC
}
CSVReader.java
CSVReader.java
import java.io.File;
import java.util.Scanner;
/**
* This class provides an implementation of the DataReader interface for CSV
* files
*
* @author Mehrdad Sabetzadeh, University of Ottawa
* @author Guy-Vincent Jourdan, University of Ottawa
*
*/
public class CSVReader implements DataReader {
/**
* The delimiter that separates attribute names and attribute values
*/
private static final char DELIMITER = ',';
/**
* Character allowing escape sequences containing the delimiter
*/
private static final char QUOTE_MARK = '\'';
/**
* Instance variable for storing the number of attributes (columns)
*/
private int numColumns;
/**
* Instance variable for storing the number of data points (data rows)
*/
private int numRows;
/**
* Instance variable for CSV file path
*/
private String filePath;
/**
* Instance variable for storing attribute names
*/
private String[] attributeNames;
/**
* Instance variable for storing data matrix
*/
private String[][] matrix;
/**
* Constructs a dataset by loading a CSV file
*
* @param strFilename is the name of the file
*/
public CSVReader(String filePath) throws Exception {
this.filePath = filePath;
calculateDimensions();
attributeNames = new String[numColumns];
matrix = new String[numRows][numColumns];
instantiateFromFile();
// createAttributes();
}
private void calculateDimensions() throws Exception {
Scanner scanner = new Scanner(new File(filePath));
boolean firstLine = true;
while (scanner.hasNext()) {
String str = scanner.nextLine();
if (!str.trim().isEmpty()) {
if (firstLine) {
numColumns = countColumns(str);
firstLine = false;
} else {
numRows++;
}
}
}
scanner.close();
}
private void instantiateFromFile() throws Exception {
Scanner scanner = new Scanner(new File(filePath));
boolean firstLine = true;
int rowNum = 0;
while (scanner.hasNext()) {
String str = scanner.nextLine();
if (!str.trim().isEmpty()) {
if (firstLine) {
firstLine = false;
populateAttributeNames(str);
} else {
populateRow(str, rowNum++);
}
}
}
scanner.close();
}
private void populateAttributeNames(String str) {
if (str == null || str.isEmpty()) {
return;
}
StringBuffer buffer = new StringBuffer();
boolean isInQuote = false;
int position = 0;
char[] chars = str.toCharArray();
char ch;
for (int i = 0; i < chars.length; i++) {
ch = chars[i];
if (isInQuote) {
if (ch == QUOTE_MARK) {
isInQuote = false;
} else {
buffer.append(ch);
}
} else if (ch == QUOTE_MARK) {
isInQuote = true;
} else if (ch == DELIMITER) {
attributeNames[position++] = buffer.toString().trim();
buffer.delete(0, buffer.length());
} else {
buffer.append(ch);
}
}
if (buffer.toString().trim().length() > 0) { // deal with last attribute name
attributeNames[position++] = buffer.toString().trim();
}
}
private void populateRow(String str, int currentRow) {
if (str == null || str.isEmpty()) {
return;
}
StringBuffer buffer = new StringBuffer();
boolean isInQuote = false;
int position = 0;
char[] chars = str.toCharArray();
char ch;
for (int i = 0; i < chars.length; i++) {
ch = chars[i];
if (isInQuote) {
if (ch == QUOTE_MARK) {
isInQuote = false;
} else {
buffer.append(ch);
}
} else if (ch == QUOTE_MARK) {
isInQuote = true;
} else if (ch == DELIMITER) {
matrix[currentRow][position++] = buffer.toString().trim();
buffer.delete(0, buffer.length());
} else {
buffer.append(ch);
}
}
if (buffer.toString().trim().length() > 0) { // deal with last attribute value
matrix[currentRow][position++] = buffer.toString().trim();
} else if (chars[chars.length - 1] == ',') {// deal with potentially missing last attribute value
matrix[currentRow][position++] = "";
}
}
private static int countColumns(String str) {
int count = 0;
if (str == null || str.isEmpty()) {
return count;
}
char[] chars = str.toCharArray();
boolean isInQuote = false;
char ch;
for (int i = 0; i < chars.length; i++) {
ch = chars[i];
if (isInQuote) {
if (ch == QUOTE_MARK) {
isInQuote = false;
}
} else if (ch == QUOTE_MARK) {
isInQuote = true;
} else if (ch == DELIMITER) {
count++;
}
}
return count + 1;
}
public String[] getAttributeNames() {
return attributeNames; // no clone
}
public String[][] getData() {
return matrix; // no clone
}
public String getSourceId() {
return filePath;
}
public int getNumberOfColumns() {
return numColumns;
}
public int getNumberOfDataRows() {
return numRows;
}
}
DataReader.java
DataReader.java
/**
* An interface that provides methods for reading a raw (unprocessed) dataset.
*
* @author Mehrdad Sabetzadeh, University of Ottawa
* @author Guy-Vincent Jourdan, University of Ottawa
*
*/
public interface DataReader {
/**
* @return the number of columns in the dataset
*/
int getNumberOfColumns();
/**
* @return the number of rows (datapoints) in the dataset
*/
int getNumberOfDataRows();
/**
* @return the names of the dataset's attributes
*/
String[] getAttributeNames();
/**
* @return the data matrix of the dataset
*/
String[][] getData();
/**
* @return a string identifier for the data source (for example, the name and
* location of the data source if the source happens to be a file).
*/
String getSourceId();
}
DataSet.java
DataSet.java
/**
* This abstract class factors out code that is common to both actual and
* virtual datasets
*
* @author Mehrdad Sabetzadeh, University of Ottawa
* @author Guy-Vincent Jourdan, University of Ottawa
*
*/
public abstract class DataSet {
/**
* the number of attributes in this dataset
*/
protected int numAttributes;
/**
* the number of datapoints in this dataset
*/
protected int numRows;
/**
* array of attributes. Notice that in A2 and A3, attributes are no longer
* represented as name labels (strings). Instead, an attribute is an instance of
* the Attribute class.
*/
protected Attribute[] attributes;
/**
* Returns the number of attributes
*
* @return number of attributes
*/
public int getNumberOfAttributes() {
return numAttributes;
}
/**
* Returns the number of datapoints
*
* @return number of datapoints
*/
public int getNumberOfDatapoints() {
return numRows;
}
/**
* Returns, for a given attribute name, a COPY of the corresponding Attribute
* instance. If the attribute name is not found, the method returns null. To
* create a copy of an Attribute instance, we call the clone() method in
* Attribute.
*
* @param attributeName is the name of the attribute of interest
* @return (copy of) Attribute instance whose name field is equal to
* attributeName
*/
public Attribute getAttribute(String attributeName) {
if (attributeName == null) {
return null;
}
for (int i = 0; i < numAttributes; i++)
if (attributes[i].getName().equals(attributeName)) {
return attributes[i].clone();
}
return null;
}
/**
* Returns, for a given position index, a COPY of the Attribute at that
* position. If the position index is out of range, the method returns null. To
* create a copy of an Attribute instance, we call the clone() method in
* Attribute.
*
* @param attributeIndex is the index of the attribute of interest
* @return (copy of) Attribute instance at position attributeIndex
*/
public Attribute getAttribute(int attributeIndex) {
if (attributeIndex < 0 || attributeIndex >= numAttributes) {
return null;
}
return attributes[attributeIndex].clone();
}
/**
* Finds the index of the attribute with a given name
*
* @param attributeName is the name of the attribute of interest
...