|
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectjncc20.ArffParser
class ArffParser
Implementation of ARFF parser, used to parse training and testing files. Remarks:
1) Variables are supposed to be either nominal or numerical. Unlike Weka, it does *not* manage variable of type String, or Date.
2)It assumes the class of the problem to be named "class" in the Arff file and to be declared as last variable in the header.
Field Summary | |
---|---|
private java.lang.String |
arffFileAddress
Absolute Path of the main Arff file |
private java.lang.String |
arffTestingFileAddress
Name of the testing Arff file |
private java.util.ArrayList<java.lang.String[]> |
categoryNames
Matrix of String with rows of different lenght, as different features (each row of the matrix corresponds to a different feature) can have different numbers of categories. |
private java.util.ArrayList<java.lang.String> |
classNames
Names of the output class. |
private java.lang.String |
datasetName
Dataset Name as read from the field "@relation" in the Arff file |
private double[][] |
discretizationIntervals
Matrix with rows of different length; stores the bin ranges for numerical features |
private java.util.ArrayList<java.lang.String> |
featureNames
Names of input features |
private java.util.ArrayList<java.lang.String> |
nonMarFeatureNamesTesting
Names of NonMar features in testing |
private java.util.ArrayList<java.lang.String> |
nonMarFeatureNamesTraining
Names of NonMar features in training |
private java.util.ArrayList<java.lang.Integer> |
notUsedFeatures
Indexes of features that are not used (because discretized into a single bin) |
private java.util.ArrayList<java.lang.Boolean> |
numFlags
Flags array, regarding wheter Features are numerical (1) or not (0) |
private java.util.ArrayList<double[]> |
RawDataset
Copy of the data read from Arff file (having hence -9999 as marker for missing data), and category names substituted by the corresponding indexes.) |
private java.util.ArrayList<java.lang.String[]> |
rawTestingSet
Raw testing set exactly as read from file. |
private java.util.ArrayList<java.lang.Integer>[] |
rowsClassIdx
Indexes of the rows, in RawDataset, which have the same output class. |
private java.util.ArrayList<int[]> |
testingSet
Testing set, as accessed by the classifier: numerical variables discretized, category names substituted by their indexes, missing data marked as -9999, classes substituted with indexes. |
private java.util.ArrayList<java.lang.Integer> |
usedFeatures
Indexes of used features (i.e., categorical features and numerical features discretized into several bins) |
private java.lang.String |
validationMethod
Set either to "CV" or to the name of the testing Arff file |
private java.lang.String |
workingPath
Path where the files for the given experiment (Arff files, NonMar.txt) reside, and where the output will be saved |
Constructor Summary | |
---|---|
ArffParser(java.lang.String UserSuppliedWorkingPath,
java.lang.String UserSuppliedArffName,
java.lang.String UserSuppliedValidationMethod)
Initializes data members; than, scans the Arff file, checking the formal correctness of variable declarations, and the coherence of the data with the declarations; stores the information and the data loaded from file. |
Method Summary | |
---|---|
(package private) java.util.ArrayList<java.lang.String[]> |
getCategoryNames()
|
(package private) java.util.ArrayList<java.lang.String> |
getClassNames()
|
(package private) java.lang.String |
getDatasetName()
|
private int |
getDiscretizationIdx(java.lang.Double currentValue,
int FeatureIdx)
Return the bin in which a numerical value of a given feature falls. |
(package private) java.util.ArrayList<java.lang.String> |
getFeatureNames()
|
(package private) java.util.ArrayList<java.lang.String> |
getNonMarFeatureNamesTesting()
|
(package private) java.util.ArrayList<java.lang.String> |
getNonMarFeatureNamesTraining()
|
(package private) java.util.ArrayList<java.lang.Boolean> |
getNumFlags()
|
(package private) java.util.ArrayList<double[]> |
getRawDataset()
|
(package private) java.util.ArrayList<java.lang.String[]> |
getRawTestingSet()
|
(package private) java.util.ArrayList<java.lang.Integer>[] |
getRowsClassIdx()
|
(package private) java.util.ArrayList<int[]> |
getTestingSet()
|
private void |
parseArffFile()
Scans the main Arff file. |
(package private) void |
parseTestingArffFile(boolean UnknownClasses)
Parses the testing file, checking that all declarations are coherent with those already loaded from the training Arff file; if the classes are unknown, it reads only the instances, without looking for the classes. |
private void |
readNonMar()
Reads the file NonMar.txt, containing the list of nonMar variables; if no file is found, all variables are assumed to be MAR. |
(package private) void |
setArffTestingFileAddress(java.lang.String suppliedArffTestingFileAddress)
|
(package private) void |
setDiscretizationIntervals(double[][] suppliedDiscretizationIntervals)
|
(package private) void |
setNotUsedFeatures(java.util.ArrayList<java.lang.Integer> suppliedNotUsedFeatures)
|
(package private) void |
setUsedFeatures(java.util.ArrayList<java.lang.Integer> suppliedUsedFeatures)
|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
private java.lang.String arffFileAddress
private java.lang.String arffTestingFileAddress
private java.util.ArrayList<java.lang.String[]> categoryNames
private java.util.ArrayList<java.lang.String> classNames
private java.lang.String datasetName
private double[][] discretizationIntervals
private java.util.ArrayList<java.lang.String> featureNames
private java.util.ArrayList<java.lang.String> nonMarFeatureNamesTesting
private java.util.ArrayList<java.lang.String> nonMarFeatureNamesTraining
private java.util.ArrayList<java.lang.Integer> notUsedFeatures
private java.util.ArrayList<java.lang.Boolean> numFlags
private java.util.ArrayList<double[]> RawDataset
private java.util.ArrayList<java.lang.String[]> rawTestingSet
private java.util.ArrayList<java.lang.Integer>[] rowsClassIdx
private java.util.ArrayList<int[]> testingSet
private java.util.ArrayList<java.lang.Integer> usedFeatures
private java.lang.String validationMethod
private java.lang.String workingPath
Constructor Detail |
---|
ArffParser(java.lang.String UserSuppliedWorkingPath, java.lang.String UserSuppliedArffName, java.lang.String UserSuppliedValidationMethod)
Method Detail |
---|
java.util.ArrayList<java.lang.String[]> getCategoryNames()
java.util.ArrayList<java.lang.String> getClassNames()
java.lang.String getDatasetName()
private int getDiscretizationIdx(java.lang.Double currentValue, int FeatureIdx)
java.util.ArrayList<java.lang.String> getFeatureNames()
java.util.ArrayList<java.lang.String> getNonMarFeatureNamesTesting()
java.util.ArrayList<java.lang.String> getNonMarFeatureNamesTraining()
java.util.ArrayList<java.lang.Boolean> getNumFlags()
java.util.ArrayList<double[]> getRawDataset()
java.util.ArrayList<java.lang.String[]> getRawTestingSet()
java.util.ArrayList<java.lang.Integer>[] getRowsClassIdx()
java.util.ArrayList<int[]> getTestingSet()
private void parseArffFile()
void parseTestingArffFile(boolean UnknownClasses)
private void readNonMar()
Then, put the names of NonMar variables in TrainingNonMarFeatureNames and TestingNonMarFeatureNames.
void setArffTestingFileAddress(java.lang.String suppliedArffTestingFileAddress)
void setDiscretizationIntervals(double[][] suppliedDiscretizationIntervals)
void setNotUsedFeatures(java.util.ArrayList<java.lang.Integer> suppliedNotUsedFeatures)
void setUsedFeatures(java.util.ArrayList<java.lang.Integer> suppliedUsedFeatures)
|
||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |