00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "smart/varmap.h"
00029
00030 #ifndef _terimber_varmap_hpp_
00031 #define _terimber_varmap_hpp_
00032
00033 BEGIN_TERIMBER_NAMESPACE
00034 #pragma pack(4)
00035
00037 inline
00038 bool
00039 main_map_key::operator<(const main_map_key& x) const
00040 {
00041 return str_template::strnocasecmp(_var_res._key._res ? _var_res._key._res : "", x._var_res._key._res ? x._var_res._key._res : "", os_minus_one) < 0;
00042 }
00043
00044 inline
00045 bool
00046 main_map_key::partial_match(const main_map_key& x) const
00047 {
00048 return !str_template::strnocasecmp(_var_res._key._res ? _var_res._key._res : "",
00049 x._var_res._key._res ? x._var_res._key._res : "",
00050 x._var_res._key._res ? strlen(x._var_res._key._res) : 0);
00051 }
00052
00053 template < class T, class C >
00054 size_t
00055 varmap< T, C >::remove_all_resource(const T& x)
00056 {
00057 size_t ret = 0;
00058
00059 for (mainmap_iter_t miter = _mainmap.begin(); miter != _mainmap.end();)
00060 {
00061
00062 TYPENAME mainmap_object_t::iterator iter = miter->find(x);
00063 if (iter != miter->end())
00064 {
00065 ++ret;
00066
00067 miter->erase(_main_object_allocator, iter);
00068 }
00069
00070 if (miter->empty())
00071 {
00072 var_value val;
00073
00074 if (_partial || _fuzzy)
00075 {
00076 val._not_null = true;
00077 val._value.strVal = miter.key()._var_res._key._res;
00078 _factory.destroy(_type, val);
00079 val._value.bufVal = (const ub1_t*)miter.key()._var_res._key._offsets;
00080 if (val._value.bufVal)
00081 _factory.destroy(vt_numeric, val);
00082 }
00083 else
00084 {
00085 val = miter.key()._var_res._val;
00086 _factory.destroy(_type, val);
00087 }
00088
00089 miter = _mainmap.erase(miter);
00090 }
00091 else
00092 ++miter;
00093 }
00094
00095 return ret;
00096 }
00097
00099 template < class T, class C >
00100 varmap< T, C >::varmap(vt_types type, bool partial, bool fuzzy, variant_factory& factory) :
00101 _type(type), _partial(partial), _fuzzy(fuzzy), _compare(type, partial, fuzzy), _mainmap(_compare), _factory(factory)
00102 {
00103 assert(!_partial || !_fuzzy);
00104 }
00105
00106 template < class T, class C >
00107 varmap< T, C >::~varmap()
00108 {
00109 clear();
00110 }
00111
00112 template < class T, class C >
00113 TYPENAME varmap< T, C >::mainmap_iter_t
00114 varmap< T, C >::end()
00115 {
00116 return _mainmap.end();
00117 }
00118
00119 template < class T, class C >
00120 bool
00121 varmap< T, C >::find_exact_resource(const main_map_key& res, C& x, const T& min_filter, const T& max_filter) const
00122 {
00123 bool ret = false;
00124
00125 mainmap_citer_t ifind = _mainmap.find(res);
00126 if (ifind != _mainmap.end())
00127 {
00128
00129 ret |= add_unique_items(ifind, x, min_filter, max_filter);
00130 }
00131
00132 return ret;
00133 }
00134
00135
00136
00137 template < class T, class C >
00138 bool
00139 varmap< T, C >::find_less_resource(const main_map_key& res, C& x, const T& min_filter, const T& max_filter, bool boundary_include) const
00140 {
00141 bool ret = false;
00142
00143
00144
00145
00146
00147 mainmap_citer_t istart = _mainmap.begin();
00148 mainmap_citer_t iend = boundary_include ? _mainmap.upper_bound(res) : _mainmap.lower_bound(res);
00149
00150 while (istart != iend)
00151 {
00152
00153 ret |= add_unique_items(istart, x, min_filter, max_filter);
00154 ++istart;
00155 ret = true;
00156 }
00157
00158 return ret;
00159 }
00160
00161
00162 template < class T, class C >
00163 bool
00164 varmap< T, C >::find_greater_resource(const main_map_key& res, C& x, const T& min_filter, const T& max_filter, bool boundary_include) const
00165 {
00166 bool ret = false;
00167
00168
00169
00170
00171
00172 mainmap_citer_t istart = _mainmap.end();
00173 mainmap_citer_t iend = _mainmap.end();
00174
00175 if (boundary_include)
00176 istart = _mainmap.find(res);
00177
00178 if (istart == _mainmap.end())
00179 istart = _mainmap.upper_bound(res);
00180
00181 while (istart != iend)
00182 {
00183
00184 ret |= add_unique_items(istart, x, min_filter, max_filter);
00185 ++istart;
00186 }
00187
00188 return ret;
00189 }
00190
00191 template < class T, class C >
00192 bool
00193 varmap< T, C >::intersect_exact_resource(const main_map_key& res, C& x, const T& min_filter, const T& max_filter) const
00194 {
00195
00196 mainmap_citer_t ifind = _mainmap.find(res);
00197 if (ifind != _mainmap.end())
00198 {
00199
00200 remove_uncommon_items(ifind, x, min_filter, max_filter);
00201 }
00202 else
00203 x._container.clear();
00204
00205 return !x._container.empty();
00206 }
00207
00208
00209
00210 template < class T, class C >
00211 bool
00212 varmap< T, C >::intersect_less_resource(const main_map_key& res, C& x, const T& min_filter, const T& max_filter, bool boundary_include) const
00213 {
00214
00215
00216 mainmap_citer_t istart = _mainmap.begin();
00217 mainmap_citer_t iend = boundary_include ? _mainmap.upper_bound(res) : _mainmap.lower_bound(res);
00218
00219 while (istart != iend)
00220 {
00221
00222 mark_common_items(istart, x, min_filter, max_filter);
00223 ++istart;
00224 }
00225
00226
00227 for (TYPENAME C::sorted_container_data_t::iterator it = x._container.begin(); it != x._container.end();)
00228 {
00229 if (!*it)
00230 it = x._container.erase(it);
00231 else
00232 *it = false, ++it;
00233 }
00234
00235 return !x._container.empty();
00236 }
00237
00238
00239 template < class T, class C >
00240 bool
00241 varmap< T, C >::intersect_greater_resource(const main_map_key& res, C& x, const T& min_filter, const T& max_filter, bool boundary_include) const
00242 {
00243
00244 mainmap_citer_t ifind = _mainmap.end();
00245
00246 if (boundary_include)
00247 ifind = _mainmap.find(res);
00248
00249 if (ifind == _mainmap.end())
00250 ifind = _mainmap.upper_bound(res);
00251
00252 while (ifind != _mainmap.end())
00253 {
00254
00255 mark_common_items(ifind, x, min_filter, max_filter);
00256 ++ifind;
00257 }
00258
00259
00260 for (TYPENAME C::sorted_container_data_t::iterator it = x._container.begin(); it != x._container.end();)
00261 {
00262 if (!*it)
00263 it = x._container.erase(it);
00264 else
00265 *it = false, ++it;
00266 }
00267
00268 return !x._container.empty();
00269 }
00270
00271
00272 template < class T, class C >
00273 void
00274 varmap< T, C >::fetch_all(C& x) const
00275 {
00276 T min_filter(0, 0);
00277 T max_filter(~0, ~0);
00278
00279 for (mainmap_citer_t miter = _mainmap.begin(); miter != _mainmap.end(); ++miter)
00280 {
00281 add_unique_items(miter, x, min_filter, max_filter);
00282 }
00283 }
00284
00285 template < class T, class C >
00286 void
00287 varmap< T, C >::clear()
00288 {
00289 for (mainmap_citer_t miter = _mainmap.begin(); miter != _mainmap.end(); ++miter)
00290 {
00291 var_value val;
00292 if (_partial)
00293 {
00294 val._not_null = true;
00295 val._value.strVal = miter.key()._var_res._key._res;
00296 _factory.destroy(_type, val);
00297 val._value.bufVal = (const ub1_t*)miter.key()._var_res._key._offsets;
00298 if (val._value.bufVal)
00299 _factory.destroy(vt_numeric, val);
00300 }
00301 else
00302 {
00303 val = miter.key()._var_res._val;
00304 _factory.destroy(_type, val);
00305 }
00306 }
00307
00308 _mainmap.clear();
00309 _main_object_allocator.clear_all();
00310 if (_partial)
00311 _offsetmap.clear();
00312 }
00313
00314
00315 template < class T, class C >
00316 void
00317 varmap< T, C >::get_common_items(mainmap_citer_t c, const C& r, const T& min_filter, const T& max_filter, size_t& entries)
00318 {
00319 if (c->size() < r._container.size())
00320 {
00321 TYPENAME mainmap_object_t::const_iterator clower = c->lower_bound(min_filter);
00322 TYPENAME mainmap_object_t::const_iterator cupper = c->upper_bound(max_filter);
00323
00324 for (; clower != cupper; ++clower)
00325 {
00326 size_t ckey = r.clean_key(clower.key());
00327 if (r._container.end() != r._container.find(ckey))
00328 ++entries;
00329 }
00330 }
00331 else
00332 {
00333 for (TYPENAME C::sorted_container_data_t::const_iterator riter = r._container.begin(); riter != r._container.end(); ++riter)
00334 {
00335 T ckey = r.compound_key(riter.key(), min_filter);
00336 if (c->end() != c->find(ckey))
00337 ++entries;
00338 }
00339 }
00340 }
00341
00342
00343 template < class T, class C >
00344 bool
00345 varmap< T, C >::add_unique_items(mainmap_citer_t c, C& r, const T& min_filter, const T& max_filter)
00346 {
00347 bool ret = false;
00348
00349 TYPENAME mainmap_object_t::const_iterator clower = c->lower_bound(min_filter);
00350 TYPENAME mainmap_object_t::const_iterator cupper = c->upper_bound(max_filter);
00351
00352 for (; clower != cupper; ++clower)
00353 {
00354 size_t ckey = r.clean_key(clower.key());
00355 if (r._container.end() == r._container.find(ckey))
00356 {
00357 r._container.insert(r._allocator, ckey, false);
00358 ret = true;
00359 }
00360 }
00361
00362 return ret;
00363 }
00364
00365 template < class T, class C >
00366 size_t
00367 varmap< T, C >::get_main_object_allocator_pages() const
00368 {
00369 return _main_object_allocator.count();
00370 }
00371
00372 template < class T, class C >
00373 size_t
00374 varmap< T, C >::get_main_object_allocator_capacity() const
00375 {
00376 return _main_object_allocator.capacity();
00377 }
00378
00379 template < class T, class C >
00380 TYPENAME varmap< T, C >::mainmap_iter_t
00381 varmap< T, C >::add_resource(const main_map_key& mkey, const T& x)
00382 {
00383
00384 mainmap_iter_t ifind = _mainmap.find(mkey);
00385 if (ifind == _mainmap.end())
00386 {
00387 mainmap_object_t obj;
00388
00389
00390 if (_partial)
00391 {
00392 main_map_key clone_key(mkey._var_res._key._res, &_factory);
00393 ifind = _mainmap.insert(clone_key, obj).first;
00394 }
00395 else if (_fuzzy)
00396 {
00397 main_map_key clone_key(mkey._var_res._ngram._fuzzy_key, mkey._var_res._key._res, &_factory);
00398 ifind = _mainmap.insert(clone_key, obj).first;
00399 }
00400 else
00401 {
00402 main_map_key clone_key(_type, mkey._var_res._val, &_factory);
00403 ifind = _mainmap.insert(clone_key, obj).first;
00404 }
00405
00406 if (ifind == _mainmap.end())
00407 return ifind;
00408
00409
00410 ifind->insert(_main_object_allocator, x, false);
00411
00412 if (_partial)
00413 {
00414
00415 for (ub1_t i = 0; ifind.key()._var_res._key._offsets && i < *ifind.key()._var_res._key._offsets; ++i)
00416 {
00417 offset_map_key skey(ifind, i);
00418 _offsetmap.insert(skey, true);
00419 }
00420 }
00421 }
00422 else
00423 {
00424
00425 TYPENAME mainmap_object_t::const_iterator iter = ifind->find(x);
00426
00427 if (iter == ifind->end())
00428 ifind->insert(_main_object_allocator, x, false);
00429 }
00430
00431 return ifind;
00432 }
00433
00434
00435 template < class T, class C >
00436 bool
00437 varmap< T, C >::remove_resource(mainmap_iter_t ierase, const T& x)
00438 {
00439
00440 TYPENAME mainmap_object_t::iterator iter = ierase->find(x);
00441 if (iter == ierase->end())
00442 return false;
00443
00444
00445 ierase->erase(_main_object_allocator, iter);
00446
00447 if (!ierase->empty())
00448 return true;
00449
00450 var_value val;
00451
00452 if (_partial)
00453 {
00454
00455 for (ub1_t i = 0; ierase.key()._var_res._key._offsets && i < *ierase.key()._var_res._key._offsets; ++i)
00456 {
00457
00458 offset_map_key skey(ierase, i);
00459 offsetmap_iter_t iLower = _offsetmap.lower_bound(skey);
00460 offsetmap_iter_t iUpper = _offsetmap.upper_bound(skey);
00461
00462 if (iLower != _offsetmap.end())
00463 {
00464
00465 while (iLower != iUpper)
00466 {
00467 if (iLower.key()._iter == ierase)
00468 iLower = _offsetmap.erase(iLower);
00469 else
00470 ++iLower;
00471 }
00472 }
00473 }
00474
00475
00476 val._not_null = true;
00477 val._value.strVal = ierase.key()._var_res._key._res;
00478 if (val._value.strVal)
00479 _factory.destroy(_type, val);
00480
00481 val._value.bufVal = (const ub1_t*)ierase.key()._var_res._key._offsets;
00482 if (val._value.bufVal)
00483 _factory.destroy(vt_numeric, val);
00484 }
00485 else if (_fuzzy)
00486 {
00487
00488 val._not_null = true;
00489 val._value.strVal = ierase.key()._var_res._ngram._res;
00490 if (val._value.strVal)
00491 _factory.destroy(_type, val);
00492 }
00493 else
00494 {
00495 val = ierase.key()._var_res._val;
00496 _factory.destroy(_type, val);
00497 }
00498
00499 _mainmap.erase(ierase);
00500 return true;
00501 }
00502
00503 template < class T, class C >
00504 bool
00505 varmap< T, C >::find_partial_resource(const main_map_key& mkey, bool deep, C& x, const T& min_filter, const T& max_filter) const
00506 {
00507 bool ret = false;
00508 assert(_partial);
00509
00510 mainmap_citer_t bmfind = _mainmap.lower_bound(mkey);
00511 while (bmfind != _mainmap.end()
00512 && bmfind.key().partial_match(mkey)
00513 )
00514 {
00515
00516 ret |= add_unique_items(bmfind, x, min_filter, max_filter);
00517 ++bmfind;
00518 }
00519
00520 if (deep && !_offsetmap.empty())
00521 {
00522
00523 offsetmap_citer_t bsupper = _offsetmap.end();
00524 --bsupper;
00525
00526 size_t max_tokens = bsupper.key()._offset;
00527
00528
00529 for (size_t i = 0; deep && i <= max_tokens; ++i)
00530 {
00531 offset_map_key skey(mkey._var_res._key._res, i);
00532 offsetmap_citer_t bsfind = _offsetmap.lower_bound(skey);
00533 while (bsfind != _offsetmap.end()
00534 && bsfind.key()._offset == i
00535 && bsfind.key().partial_match(mkey)
00536 )
00537 {
00538
00539 ret |= add_unique_items(bsfind.key()._iter, x, min_filter, max_filter);
00540 ++bsfind;
00541 }
00542 }
00543 }
00544
00545 return ret;
00546 }
00547
00548 template < class T, class C >
00549 bool
00550 varmap< T, C >::intersect_partial_resource(const main_map_key& mkey, bool deep, C& x, const T& min_filter, const T& max_filter) const
00551 {
00552 TYPENAME C::sorted_container_data_t::iterator it = x._container.begin();
00553
00554
00555 mainmap_citer_t bmfind = _partial ? _mainmap.lower_bound(mkey) : _mainmap.find(mkey);
00556 while (bmfind != _mainmap.end()
00557 && (!_partial || _partial && bmfind.key().partial_match(mkey))
00558 )
00559 {
00560
00561 mark_common_items(bmfind, x, min_filter, max_filter);
00562 ++bmfind;
00563 }
00564
00565
00566 if (_partial && deep && !_offsetmap.empty())
00567 {
00568
00569 offsetmap_citer_t bsupper = _offsetmap.end();
00570 --bsupper;
00571
00572 size_t max_tokens = bsupper.key()._offset;
00573
00574
00575 for (size_t i = 0; deep && i <= max_tokens; ++i)
00576 {
00577 offset_map_key skey(mkey._var_res._key._res, i);
00578 offsetmap_citer_t bsfind = _offsetmap.lower_bound(skey);
00579 while (bsfind != _offsetmap.end()
00580 && bsfind.key()._offset == i
00581 && bsfind.key().partial_match(mkey)
00582 )
00583 {
00584
00585 mark_common_items(bmfind, x, min_filter, max_filter);
00586 ++bsfind;
00587 }
00588 }
00589 }
00590
00591
00592 for (it = x._container.begin(); it != x._container.end();)
00593 {
00594 if (!*it)
00595 it = x._container.erase(it);
00596 else
00597 *it = false, ++it;
00598 }
00599
00600 return !x._container.empty();
00601 }
00602
00603 template < class T, class C >
00604 bool
00605 varmap< T, C >::find_fuzzy_resource(const _list< size_t >& fuzzy_container, C& x, const T& min_filter, const T& max_filter) const
00606 {
00607 TYPENAME C::sorted_container_data_t::iterator it = x._container.begin();
00608
00609 bool ret = false;
00610 assert(_fuzzy);
00611
00612
00613 for (_list< size_t >::const_iterator iter_candidates = fuzzy_container.begin(); iter_candidates != fuzzy_container.end(); ++iter_candidates)
00614 {
00615 main_map_key mkey(*iter_candidates, 0, 0);
00616
00617 mainmap_citer_t mfind = _mainmap.lower_bound(mkey);
00618 if (mfind != _mainmap.end())
00619 {
00620
00621 ret |= add_unique_items(mfind, x, min_filter, max_filter);
00622 }
00623 }
00624
00625 return ret;
00626 }
00627
00628 template < class T, class C >
00629 bool
00630 varmap< T, C >::intersect_fuzzy_resource(const _list< size_t >& fuzzy_container, C& x, const T& min_filter, const T& max_filter) const
00631 {
00632 TYPENAME C::sorted_container_data_t::iterator it = x._container.begin();
00633
00634
00635 for (_list< size_t >::const_iterator iter_candidates = fuzzy_container.begin(); iter_candidates != fuzzy_container.end(); ++iter_candidates)
00636 {
00637 main_map_key mkey(*iter_candidates, 0, 0);
00638
00639 mainmap_citer_t mfind = _mainmap.lower_bound(mkey);
00640 if (mfind != _mainmap.end())
00641 {
00642
00643 mark_common_items(mfind, x, min_filter, max_filter);
00644 }
00645 }
00646
00647
00648 for (it = x._container.begin(); it != x._container.end();)
00649 {
00650 if (!*it)
00651 it = x._container.erase(it);
00652 else
00653 *it = false, ++it;
00654 }
00655
00656 return !x._container.empty();
00657 }
00658
00659
00660 template < class T, class C >
00661 void
00662 varmap< T, C >::mark_common_items(mainmap_citer_t c, C& r, const T& min_filter, const T& max_filter)
00663 {
00664
00665 if (c->size() < r._container.size())
00666 {
00667 TYPENAME mainmap_object_t::const_iterator clower = c->lower_bound(min_filter);
00668 TYPENAME mainmap_object_t::const_iterator cupper = c->upper_bound(max_filter);
00669
00670 for (; clower != cupper; ++clower)
00671 {
00672 TYPENAME C::sorted_container_data_t::iterator riter = r._container.find(r.clean_key(clower.key()));
00673 if (riter != r._container.end())
00674 *riter = true;
00675 }
00676 }
00677 else
00678 {
00679 for (TYPENAME C::sorted_container_data_t::iterator riter = r._container.begin(); riter != r._container.end(); ++riter)
00680 {
00681 T ckey = r.compound_key(riter.key(), min_filter);
00682 TYPENAME mainmap_object_t::const_iterator citer = c->find(ckey);
00683 if (citer != c->end())
00684 *riter = true;
00685 }
00686 }
00687 }
00688
00689
00690 template < class T, class C >
00691 void
00692 varmap< T, C >::remove_uncommon_items(mainmap_citer_t c, C& r, const T& min_filter, const T& max_filter)
00693 {
00694 for (TYPENAME C::sorted_container_data_t::iterator riter = r._container.begin(); riter != r._container.end();)
00695 {
00696 T ckey = r.compound_key(riter.key(), min_filter);
00697 TYPENAME mainmap_object_t::const_iterator citer = c->find(ckey);
00698 if (citer != c->end())
00699 ++riter;
00700 else
00701 riter = r._container.erase(riter);
00702 }
00703 }
00704
00705 #pragma pack()
00706 END_TERIMBER_NAMESPACE
00707
00708 #endif // _terimber_varmap_hpp_