00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #ifndef _terimber_mst_hpp_
00029 #define _terimber_mst_hpp_
00030
00031 #include "alg/mst.h"
00032 #include "base/stack.hpp"
00033
00034 BEGIN_TERIMBER_NAMESPACE
00035 #pragma pack(4)
00036
00037 template < class T >
00038 inline
00039 priority_queue< T >::priority_queue(byte_allocator& all, size_t N, const _vector< T >& pred, size_t dim) :
00040 _dim(dim), _Num(0), _pred(pred)
00041 {
00042 _pq.resize(all, N + 1, 0);
00043 _qp.resize(all, N + 1, 0);
00044 }
00045
00046 template < class T >
00047 inline
00048 bool
00049 priority_queue< T >::empty() const
00050 {
00051 return !_Num;
00052 }
00053
00054 template < class T >
00055 inline
00056 size_t
00057 priority_queue< T >::size() const
00058 {
00059 return _Num;
00060 }
00061
00062 template < class T >
00063 inline
00064 void
00065 priority_queue< T >::insert(size_t v)
00066 {
00067 _pq[++_Num] = v;
00068 _qp[v] = _Num;
00069 fixUp(_Num);
00070 }
00071
00072 template < class T >
00073 inline
00074 size_t
00075 priority_queue< T >::getmin()
00076 {
00077 exchange(1, _Num);
00078 fixDown(1, _Num - 1);
00079 return _pq[_Num--];
00080 }
00081
00082 template < class T >
00083 inline
00084 void
00085 priority_queue< T >::lower(size_t k)
00086 {
00087 fixUp(_qp[k]);
00088 }
00089
00090 template < class T >
00091 inline
00092 void
00093 priority_queue< T >::exchange(size_t i, size_t j)
00094 {
00095 size_t t = _pq[i];
00096 _pq[i] = _pq[j];
00097 _pq[j] = t;
00098
00099 _qp[_pq[i]] = i;
00100 _qp[_pq[j]] = j;
00101 }
00102
00103 template < class T >
00104 inline
00105 void
00106 priority_queue< T >::fixUp(size_t i)
00107 {
00108 size_t j;
00109 while (i > 1 && _pred[_pq[j = (i + _dim - 2) / _dim]] > _pred[_pq[i]])
00110 {
00111 exchange(i, j);
00112 i = j;
00113 }
00114 }
00115
00116 template < class T >
00117 inline
00118 void
00119 priority_queue< T >::fixDown(size_t k, size_t N)
00120 {
00121 size_t j;
00122 while ((j = _dim * (k - 1) + 2) <= N)
00123 {
00124 for (size_t i = j + 1; i < j + _dim && i <= N; ++i)
00125 {
00126 if (_pred[_pq[j]] > _pred[_pq[i]])
00127 j = i;
00128 }
00129
00130 if (_pred[_pq[k]] <= _pred[_pq[j]])
00131 break;
00132
00133 exchange(k, j);
00134 k = j;
00135 }
00136 }
00137
00139
00140 template < class T, class N >
00141 mst< T, N >::mst(const T& container, const N& notifier, byte_allocator& all, byte_allocator& temp) :
00142 _container(container), _notifier(notifier), _length(container.size())
00143 {
00144 _wt.resize(all, _length, -1.0);
00145 mst_edge info(-1, -1, -1.0);
00146 _fr.resize(all, _length, info);
00147 _mst.resize(all, _length, info);
00148
00149 for (size_t v = 0; v < _length; ++v)
00150 {
00151 if (_mst[v]._distance == -1.0)
00152 {
00153 pfs(v, temp);
00154 }
00155 }
00156 }
00157
00158
00159 template < class T, class N >
00160 inline
00161 const mst_vec_t&
00162 mst< T, N >::get_mst() const
00163 {
00164 return _mst;
00165 }
00166
00167 template < class T, class N >
00168 inline
00169 void
00170 mst< T, N >::pfs(size_t s, byte_allocator& tmp)
00171 {
00172 tmp.reset();
00173
00174 priority_queue< double > pQ(tmp, _length, _wt);
00175 pQ.insert(s);
00176
00177 size_t step = 0;
00178 char buf[128];
00179
00180 while (!pQ.empty())
00181 {
00182 size_t v = pQ.getmin();
00183 _mst[v] = _fr[v];
00184
00185 for (size_t w = 0; w < _length; ++w)
00186 {
00187 if (v == w)
00188 continue;
00189
00190 double distance;
00191
00192 if (_fr[w]._distance == -1.0)
00193 {
00194 distance = _container.distance(v, w);
00195
00196 _wt[w] = distance;
00197
00198 pQ.insert(w);
00199
00200 _fr[w]._from = v;
00201 _fr[w]._to = w;
00202 _fr[w]._distance = distance;
00203 }
00204 else if (_mst[w]._distance == -1.0
00205 && (distance = _container.distance(v, w)) < _wt[w])
00206 {
00207 _wt[w] = distance;
00208
00209 pQ.lower(w);
00210
00211 _fr[w]._from = v;
00212 _fr[w]._to = w;
00213 _fr[w]._distance = distance;
00214 }
00215 }
00216
00217 if (++step && !(step % 100))
00218 {
00219 str_template::strprint(buf, 128, "edges processed %d", step);
00220 _notifier.notify(buf);
00221 }
00222 }
00223 }
00224
00226
00227 template < class T, class N >
00228 cluster_processor< T, N >::cluster_processor(const T& container, const N& notifier, double max_vertex_distance, double max_cluster_distance, double avg_cluster_distance) :
00229 _container(container), _notifier(notifier)
00230 {
00231 cut(max_vertex_distance, max_cluster_distance, avg_cluster_distance);
00232 }
00233
00234 template < class T, class N >
00235 void
00236 cluster_processor< T, N >::cut(double max_vertex_distance, double max_cluster_distance, double avg_cluster_distance)
00237 {
00238 byte_allocator temp, lookup;
00239
00240 char buf[128];
00241
00242 str_template::strprint(buf, 128, "mst process has been started...");
00243 _notifier.notify(buf);
00244
00245
00246 mst< T, N > obj(_container, _notifier, temp, lookup);
00247
00248
00249 const mst_vec_t& edges = obj.get_mst();
00250
00251 str_template::strprint(buf, 128, "mst edges to be clustered: %d", edges.size());
00252 _notifier.notify(buf);
00253
00254
00255 mst_head_map_t priority_map;
00256
00257 for (mst_vec_t::const_iterator iter = edges.begin(); iter != edges.end(); ++iter)
00258 {
00259 cluster_info cinfo(-1, 0.0);
00260 _mst_map.insert(temp, *iter, cinfo);
00261
00262 mst_head_map_t::iterator piter = priority_map.find(iter->_from);
00263
00264 if (piter != priority_map.end())
00265 ++*piter;
00266 else
00267 priority_map.insert(temp, iter->_from, 1);
00268 }
00269
00270 _vector< mst_head_map_citer_t > priority_vector;
00271
00272 priority_vector.resize(temp, priority_map.size(), priority_map.end());
00273
00274 size_t pindex = 0;
00275 mst_head_map_t::const_iterator piter = priority_map.begin();
00276
00277 for (; piter != priority_map.end(); ++piter, ++pindex)
00278 priority_vector[pindex] = piter;
00279
00280 mst_head_compare_counts sorter;
00281 std::sort(priority_vector.begin(), priority_vector.end(), sorter);
00282
00283
00284 size_t step = 0;
00285 size_t ident = os_minus_one;
00286 cluster_map_t::iterator citer;
00287 cluster_mst_map_t::iterator miter;
00288
00289
00290 for (_vector< mst_head_map_citer_t >::const_iterator viter = priority_vector.begin(); viter != priority_vector.end(); ++viter)
00291 {
00292 mst_edge item(viter->key(), os_minus_one, 0.0);
00293
00294 cluster_mst_map_t::iterator fiter = _mst_map.lower_bound(item);
00295
00296 if (fiter == _mst_map.end()
00297 || fiter.key()._from != viter->key())
00298 continue;
00299
00300 size_t cluster_ident = fiter->_cluster_ident;
00301
00302 if (cluster_ident == os_minus_one)
00303 {
00304
00305 cluster_ident = ++ident;
00306 _list< size_t > info;
00307 citer = _clusters.insert(_all, cluster_ident, info).first;
00308
00309 citer->push_back(_all, fiter.key()._from);
00310
00311 fiter->_cluster_ident = cluster_ident;
00312 }
00313
00314 lookup.reset();
00315
00316 _stack< size_t > walkup_stack;
00317 walkup_stack.push(lookup, fiter.key()._from);
00318
00319 do
00320 {
00321 size_t i = walkup_stack.top();
00322 walkup_stack.pop();
00323
00324 mst_edge item(i, os_minus_one, 0.0);
00325
00326 miter = _mst_map.lower_bound(item);
00327
00328 size_t from;
00329
00330 while (miter != _mst_map.end()
00331 && (from = miter.key()._from) == i)
00332 {
00333 const mst_edge& r = miter.key();
00334
00335 if (r._distance <= max_vertex_distance
00336 && miter->_cluster_distance <= max_cluster_distance
00337 && (fiter.key()._to == r._to || miter->_cluster_ident == os_minus_one)
00338 )
00339 {
00340
00341 if (fiter.key()._to != r._to)
00342 {
00343 miter->_cluster_ident = cluster_ident;
00344 miter->_cluster_distance = fiter->_cluster_distance + r._distance;
00345 citer->push_back(_all, r._to);
00346 }
00347
00348
00349 walkup_stack.push(lookup, r._to);
00350 }
00351
00352 ++miter;
00353 }
00354 }
00355 while (!walkup_stack.empty());
00356
00357 if (++step && !(step % 100))
00358 {
00359 str_template::strprint(buf, 128, "objects processed %d", step);
00360 _notifier.notify(buf);
00361 }
00362 }
00363
00364 str_template::strprint(buf, 128, "processing orphans...");
00365 _notifier.notify(buf);
00366
00367
00368 size_t oindex = 0;
00369 size_t orphans = 0;
00370
00371 for (miter = _mst_map.begin(); miter != _mst_map.end(); ++miter, ++oindex)
00372 {
00373 size_t cluster_ident = miter->_cluster_ident;
00374
00375 if (cluster_ident != os_minus_one)
00376 continue;
00377
00378 size_t cindex = 0;
00379 double min_agg_distance = 1.0;
00380
00381
00382 for (cluster_map_t::iterator it = _clusters.begin(); it != _clusters.end(); ++it, ++cindex)
00383 {
00384 double davg = avg_distance(it, oindex);
00385
00386 if (davg < min_agg_distance)
00387 {
00388 cluster_ident = cindex;
00389 min_agg_distance = davg;
00390 citer = it;
00391 }
00392 }
00393
00394
00395 if (cluster_ident != -1 && min_agg_distance <= avg_cluster_distance)
00396 {
00397 miter->_cluster_ident = cluster_ident;
00398 miter->_cluster_distance = min_agg_distance;
00399 citer->push_back(_all, oindex);
00400 }
00401 else
00402 {
00403
00404 cluster_ident = ++ident;
00405 cluster_list_t info;
00406 cluster_map_t::iterator citer = _clusters.insert(_all, cluster_ident, info).first;
00407 citer->push_back(_all, oindex);
00408 }
00409
00410 if (++orphans && !(orphans % 100))
00411 {
00412 str_template::strprint(buf, 128, "orphans processed %d", orphans);
00413 _notifier.notify(buf);
00414 }
00415 }
00416
00417 str_template::strprint(buf, 128, "found orphans: %d", orphans);
00418 _notifier.notify(buf);
00419 }
00420
00421 template < class T, class N >
00422 inline
00423 const cluster_map_t&
00424 cluster_processor< T, N >::get_clusters() const
00425 {
00426 return _clusters;
00427 }
00428
00429 template < class T, class N >
00430 inline
00431 double
00432 cluster_processor< T, N >::avg_distance(cluster_map_t::const_iterator citer, size_t index) const
00433 {
00434 double ret = 0.0;
00435 size_t count = 0;
00436 for (cluster_list_t::const_iterator iter = citer->begin();
00437 iter != citer->end();
00438 ++iter, ++count)
00439 {
00440 ret += _container.distance(*iter, index);
00441 }
00442
00443 return ret / count;
00444 }
00445
00446 #pragma pack()
00447 END_TERIMBER_NAMESPACE
00448
00449 #endif