mlpack  2.0.1
hoeffding_numeric_split.hpp
Go to the documentation of this file.
1 
16 #ifndef __MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_NUMERIC_SPLIT_HPP
17 #define __MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_NUMERIC_SPLIT_HPP
18 
19 #include <mlpack/core.hpp>
20 #include "numeric_split_info.hpp"
21 
22 namespace mlpack {
23 namespace tree {
24 
53 template<typename FitnessFunction,
54  typename ObservationType = double>
56 {
57  public:
60 
70  HoeffdingNumericSplit(const size_t numClasses,
71  const size_t bins = 10,
72  const size_t observationsBeforeBinning = 100);
73 
78  HoeffdingNumericSplit(const size_t numClasses,
79  const HoeffdingNumericSplit& other);
80 
90  void Train(ObservationType value, const size_t label);
91 
104  void EvaluateFitnessFunction(double& bestFitness, double& secondBestFitness)
105  const;
106 
108  size_t NumChildren() const { return bins; }
109 
114  void Split(arma::Col<size_t>& childMajorities, SplitInfo& splitInfo) const;
115 
117  size_t MajorityClass() const;
119  double MajorityProbability() const;
120 
122  size_t Bins() const { return bins; }
123 
125  template<typename Archive>
126  void Serialize(Archive& ar, const unsigned int /* version */);
127 
128  private:
130  arma::Col<ObservationType> observations;
132  arma::Col<size_t> labels;
133 
135  arma::Col<ObservationType> splitPoints;
137  size_t bins;
141  size_t samplesSeen;
142 
144  arma::Mat<size_t> sufficientStatistics;
145 };
146 
148 template<typename FitnessFunction>
149 using HoeffdingDoubleNumericSplit = HoeffdingNumericSplit<FitnessFunction,
150  double>;
151 
152 } // namespace tree
153 } // namespace mlpack
154 
155 // Include implementation.
156 #include "hoeffding_numeric_split_impl.hpp"
157 
158 #endif
size_t samplesSeen
The number of samples we have seen so far.
Linear algebra utility functions, generally performed on matrices or vectors.
size_t observationsBeforeBinning
The number of observations we must see before binning.
The HoeffdingNumericSplit class implements the numeric feature splitting strategy alluded to by Domin...
void Train(ObservationType value, const size_t label)
Train the HoeffdingNumericSplit on the given observed value (remember that this object only cares abo...
NumericSplitInfo< ObservationType > SplitInfo
The splitting information type required by the HoeffdingNumericSplit.
void Serialize(Archive &ar, const unsigned int)
Serialize the object.
void EvaluateFitnessFunction(double &bestFitness, double &secondBestFitness) const
Evaluate the fitness function given what has been calculated so far.
void Split(arma::Col< size_t > &childMajorities, SplitInfo &splitInfo) const
Return the majority class of each child to be created, if a split on this dimension was performed...
size_t NumChildren() const
Return the number of children if this node splits on this feature.
arma::Col< ObservationType > splitPoints
The split points for the binning (length bins - 1).
double MajorityProbability() const
Return the probability of the majority class.
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
size_t Bins() const
Return the number of bins.
arma::Col< ObservationType > observations
Before binning, this holds the points we have seen so far.
size_t MajorityClass() const
Return the majority class.
HoeffdingNumericSplit(const size_t numClasses, const size_t bins=10, const size_t observationsBeforeBinning=100)
Create the HoeffdingNumericSplit class, and specify some basic parameters about how the binning shoul...
arma::Col< size_t > labels
This holds the labels of the points before binning.
arma::Mat< size_t > sufficientStatistics
After binning, this contains the sufficient statistics.