diff --git a/src/vocabulary_creator.cpp b/src/vocabulary_creator.cpp index 63f7978..b3cee85 100644 --- a/src/vocabulary_creator.cpp +++ b/src/vocabulary_creator.cpp @@ -6,9 +6,21 @@ inline int omp_get_max_threads(){return 1;} inline int omp_get_thread_num(){return 0;} #endif #include +#include using namespace std; namespace fbow{ +/** + * Returns a random number in the range [min..max] + * @param min + * @param max + * @return random T number in [min..max] + */ +template +static T RandomValue(T min, T max) { + return ((T)rand() / (T)RAND_MAX) * (max - min) + min; +} + void VocabularyCreator::create(fbow::Vocabulary &Voc, const cv::Mat &features, const std::string &desc_name, Params params) { std::vector vfeatures(1); @@ -108,7 +120,7 @@ void VocabularyCreator::createLevel( int parent, int curL,bool recursive){ } //initialize clusters - auto centers=getInitialClusterCenters(findices ); + auto centers= initialClusterCentersKmpp(findices ); center_features.resize(centers.size()); for(size_t i=0;i VocabularyCreator::getInitialClusterCenters(const std::ve return centers; } +std::vector VocabularyCreator::initialClusterCentersKmpp(const std::vector &findices) +{ + // Implements kmeans++ seeding algorithm + // Algorithm: + // 1. Choose one center uniformly at random from among the data points. + // 2. For each data point x, compute D(x), the distance between x and the nearest + // center that has already been chosen. + // 3. Add one new data point as a center. Each point x is chosen with probability + // proportional to D(x)^2. + // 4. Repeat Steps 2 and 3 until k centers have been chosen. + // 5. Now that the initial centers have been chosen, proceed using standard k-means + // clustering. + + std::vector centers; + centers.reserve(_params.k); + for (auto fi : findices) _features(fi).m_Dist = std::numeric_limits::max(); + + // 1. + + uint32_t ifeature = findices[rand() % findices.size()]; + + // create first cluster + centers.push_back(ifeature); + + // compute the initial distances + auto last_center_feat = _features[centers.back()]; + for (auto fi : findices) { + auto &feature = _features(fi); + feature.m_Dist = dist_func(last_center_feat, _features[fi]); + } + + while ((int)centers.size() < _params.k) + { + last_center_feat = _features[centers.back()]; + for (auto fi : findices) { + auto &feature = _features(fi); + if(feature.m_Dist > 0.0f) + feature.m_Dist = std::min(feature.m_Dist, dist_func(last_center_feat, _features[fi])); + } + + double dist_sum = std::accumulate(findices.begin(), findices.end(), 0.0, [&](float acc, const unsigned fid) { return acc + _features(fid).m_Dist; }); + if (dist_sum > 0) + { + double cut_d; + do + { + cut_d = RandomValue(0, dist_sum); + } while (cut_d == 0.0); + + double d_up_now = 0; + std::vector::const_iterator dit; + for (dit = findices.begin(); dit != findices.end(); ++dit) + { + d_up_now += _features(*dit).m_Dist; + if (d_up_now >= cut_d) break; + } + + if (dit == findices.end()) + --dit; + + centers.push_back(*dit); + + } // if dist_sum > 0 + else + break; + + } // while(used_clusters < m_k) + + return centers; +} + std::size_t VocabularyCreator::vhash(const std::vector > & v_vec) { std::size_t seed = 0; diff --git a/src/vocabulary_creator.h b/src/vocabulary_creator.h index e8a24b0..58a6921 100644 --- a/src/vocabulary_creator.h +++ b/src/vocabulary_creator.h @@ -82,6 +82,7 @@ class FBOW_API VocabularyCreator void createLevel(const std::vector &findices, int parent=0, int curL=0); void createLevel(int parent=0, int curL=0, bool recursive=true); std::vector getInitialClusterCenters(const std::vector &findices); + std::vector initialClusterCentersKmpp(const std::vector &findices); std::size_t vhash(const std::vector >& v_vec) ; @@ -148,7 +149,6 @@ class FBOW_API VocabularyCreator struct Node{ Node(){} Node(uint32_t Id,uint32_t Parent,const cv::Mat &Feature, uint32_t Feat_idx=std::numeric_limits::max() ):id(Id),parent(Parent),feature(Feature),feat_idx(Feat_idx){ - } uint32_t id=std::numeric_limits::max();//id of this node in the tree