diff --git a/Python/rerf/rerfClassifier.py b/Python/rerf/rerfClassifier.py index 2c403a29..5bc697b2 100644 --- a/Python/rerf/rerfClassifier.py +++ b/Python/rerf/rerfClassifier.py @@ -44,7 +44,8 @@ class rerfClassifier(BaseEstimator, ClassifierMixin): The random combination of features to use: either "RerF", "Base", or "S-RerF". "RerF" randomly combines features for each `mtry`. Base is our implementation of Random Forest. "S-RerF" is structured RerF, - combining multiple features together in random patches. + combining multiple features together in random patches. "Graph-Node-MORF" + and "Graph-Edge-MORF" is MORF for graph valued data. See Tomita et al. (2016) [#Tomita]_ for further details. n_estimators : int, optional (default: 500) Number of trees in forest. @@ -230,7 +231,7 @@ def fit(self, X, y): else: forestType = "binnedBaseTern" self.method_to_use_ = 1 - elif self.projection_matrix == "S-RerF": + elif self.projection_matrix in ["S-RerF", "Graph-Node-MORF", "Graph-Edge-MORF"]: if self.oob_score: warn( "OOB is not currently implemented for the S-RerF" @@ -241,7 +242,13 @@ def fit(self, X, y): self.oob_score = False forestType = "binnedBaseTern" # this should change - self.method_to_use_ = 2 + if self.projection_matrix == "S-RerF": + self.method_to_use_ = 2 + elif self.projection_matrix == "Graph-Node-MORF": + self.method_to_use_ = 3 + elif self.projection_matrix == "Graph-Edge-MORF": + self.method_to_use_ = 4 + # Check that image_height and image_width are divisors of # the num_features. This is the most we can do to # prevent an invalid value being passed in. diff --git a/packedForest/src/forestTypes/binnedTree/processingNodeBin.h b/packedForest/src/forestTypes/binnedTree/processingNodeBin.h index f0e7a2e9..77c56b55 100644 --- a/packedForest/src/forestTypes/binnedTree/processingNodeBin.h +++ b/packedForest/src/forestTypes/binnedTree/processingNodeBin.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace fp{ @@ -81,7 +82,7 @@ namespace fp{ inline void calcMtryForNode(std::vector& featuresToTry){ featuresToTry.resize(fpSingleton::getSingleton().returnMtry()); int methodToUse = fpSingleton::getSingleton().returnMethodToUse(); - assert(methodToUse == 1 || methodToUse == 2); + assert(methodToUse == 1 || methodToUse == 2 || methodToUse == 3 || methodToUse == 4); switch(methodToUse){ case 1:{ @@ -92,6 +93,14 @@ namespace fp{ randMatImagePatch(featuresToTry, paramsRandMatImagePatch()); break; } + case 3:{ + randMatGraphNodePatch(featuresToTry, paramsRandMatGraphNodePatch()); + break; + } + case 4:{ + randMatGraphEdgePatch(featuresToTry, paramsRandMatGraphEdgePatch()); + break; + } } } @@ -175,6 +184,140 @@ namespace fp{ } } // END randMatStructured + inline std::vector paramsRandMatGraphNodePatch() + { + // Preset parameters + const int &imageHeight = fpSingleton::getSingleton().returnImageHeight(); + const int &imageWidth = fpSingleton::getSingleton().returnImageWidth(); + + // Use height as placeholder for number of nodes to sample + const int &patchHeightMax = fpSingleton::getSingleton().returnPatchHeightMax(); + const int &patchHeightMin = fpSingleton::getSingleton().returnPatchHeightMin(); + + // A vector of vectors that specifies the parameters + // for each patch: < , , > + // std::vector> heightWidthTop(3, std::vector(fpSingleton::getSingleton().returnMtry())); + + // A vector for sampling how many nodes to sample + std::vector numNodes(fpSingleton::getSingleton().returnMtry()); + + // The weight is currently hard-coded to 1. + + // Loop over mtry to load random node sizes + for (int k = 0; k < fpSingleton::getSingleton().returnMtry(); k++) + { + numNodes[k] = randNum->gen(patchHeightMax - patchHeightMin + 1) + patchHeightMin; + //sample from [patchHeightMin, patchHeightMax] + // Using the above, 1-node patches are possible ... [J1C] + } + + return (numNodes); + } // End paramsRandMatGraphPatch + + inline void randMatGraphNodePatch(std::vector &featuresToTry, std::vector numNodes) + { + assert((int)(numNodes.size()) == fpSingleton::getSingleton().returnMtry()); + + // Preset parameters + const int &imageWidth = fpSingleton::getSingleton().returnImageWidth(); + + for (int k = 0; k < fpSingleton::getSingleton().returnMtry(); k++) + { + // for each element in numNodes + // sample w/o replacement + // add the index to featuresToTry matrix? + // add 1 to the weights + std::vector subsample(imageWidth); + std::iota(std::begin(subsample), std::end(subsample), 0); + + int tempSwap; + + // Sample w/o replacement numNodes number of times + for (int locationToMove = 0; locationToMove < numNodes[k]; locationToMove++) + { + int randomPosition = randNum->gen(imageWidth - locationToMove) + locationToMove; + tempSwap = subsample[locationToMove]; + subsample[locationToMove] = subsample[randomPosition]; + subsample[randomPosition] = tempSwap; + } + + for (int i = 0; i < numNodes[k]; i++) { + // index magic here + for (int j = i + 1; j < numNodes[k]; j++) { + int featureIndex = subsample[i] * imageWidth + subsample[j]; + featuresToTry[k].returnFeatures().push_back(featureIndex); + featuresToTry[k].returnWeights().push_back(1); + } + } + } + } // END randMatStructured + + inline std::vector> paramsRandMatGraphEdgePatch() + { + // Preset parameters + const int &imageHeight = fpSingleton::getSingleton().returnImageHeight(); + const int &imageWidth = fpSingleton::getSingleton().returnImageWidth(); + + // Use height as placeholder for number of nodes to sample + const int &patchHeightMax = fpSingleton::getSingleton().returnPatchHeightMax(); + const int &patchHeightMin = fpSingleton::getSingleton().returnPatchHeightMin(); + + // A vector of vectors that specifies the parameters + // for each patch: < , , > + // std::vector> heightWidthTop(3, std::vector(fpSingleton::getSingleton().returnMtry())); + + // for each patch: < , > + std::vector> nodeNumEdges(2, std::vector(fpSingleton::getSingleton().returnMtry())); + + // The weight is currently hard-coded to 1. + + // Loop over mtry to load random node sizes + for (int k = 0; k < fpSingleton::getSingleton().returnMtry(); k++) + { + nodeNumEdges[0][k] = randNum->gen(imageHeight); + nodeNumEdges[1][k] = randNum->gen(patchHeightMax - patchHeightMin + 1) + patchHeightMin; + //sample from [patchHeightMin, patchHeightMax] + // Using the above, 1-node patches are possible ... [J1C] + } + + return (nodeNumEdges); + } // End paramsRandMatGraphEdgePatch + + inline void randMatGraphEdgePatch(std::vector &featuresToTry, std::vector> nodeNumEdges) + { + assert((int)(nodeNumEdges.size()) == fpSingleton::getSingleton().returnMtry()); + + // Preset parameters + const int &imageWidth = fpSingleton::getSingleton().returnImageWidth(); + + for (int k = 0; k < fpSingleton::getSingleton().returnMtry(); k++) + { + // for each element in numEdges + // sample w/o replacement from 1..imageWidth + // add the index to featuresToTry matrix? + // add 1 to the weights + std::vector subsample(imageWidth); + std::iota(std::begin(subsample), std::end(subsample), 0); + + int tempSwap; + + // Sample w/o replacement numEdges number of times + for (int locationToMove = 0; locationToMove < nodeNumEdges[1][k]; locationToMove++) + { + int randomPosition = randNum->gen(imageWidth - locationToMove) + locationToMove; + tempSwap = subsample[locationToMove]; + subsample[locationToMove] = subsample[randomPosition]; + subsample[randomPosition] = tempSwap; + } + + for (int i = 0; i < nodeNumEdges[1][k]; i++) + { + int featureIndex = nodeNumEdges[0][k] * imageWidth + subsample[i]; + featuresToTry[k].returnFeatures().push_back(featureIndex); + featuresToTry[k].returnWeights().push_back(1); + } + } + } // END randMatStructured inline void resetLeftNode(){ propertiesOfLeftNode.resetClassTotals(); diff --git a/packedForest/src/fpSingleton/fpInfo.h b/packedForest/src/fpSingleton/fpInfo.h index 26ec4ef4..b8921e72 100644 --- a/packedForest/src/fpSingleton/fpInfo.h +++ b/packedForest/src/fpSingleton/fpInfo.h @@ -324,8 +324,8 @@ namespace fp { useRowMajor = (bool)parameterValue; }else if(parameterName == "methodToUse"){ methodToUse = parameterValue; - if(!(methodToUse == 1 || methodToUse == 2)){ - throw std::runtime_error("methodToUse outside allowable parameters {1,2}."); + if(!(methodToUse == 1 || methodToUse == 2 || methodToUse == 3 || methodToUse == 4)){ + throw std::runtime_error("methodToUse outside allowable parameters {1,2,3,4}."); } }else if(parameterName == "imageHeight"){ imageHeight = parameterValue;