00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef __TBB_partitioner_H
00022 #define __TBB_partitioner_H
00023
00024 #ifndef __TBB_INITIAL_CHUNKS
00025 #define __TBB_INITIAL_CHUNKS 2
00026 #endif
00027 #ifndef __TBB_RANGE_POOL_CAPACITY
00028 #define __TBB_RANGE_POOL_CAPACITY 8
00029 #endif
00030 #ifndef __TBB_INIT_DEPTH
00031 #define __TBB_INIT_DEPTH 5
00032 #endif
00033
00034 #include "task.h"
00035 #include "aligned_space.h"
00036 #include "atomic.h"
00037
00038 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
00039
00040 #pragma warning (push)
00041 #pragma warning (disable: 4244)
00042 #endif
00043
00044 namespace tbb {
00045
00046 class auto_partitioner;
00047 class simple_partitioner;
00048 class affinity_partitioner;
00049 namespace interface6 {
00050 namespace internal {
00051 class affinity_partition_type;
00052 }
00053 }
00054
00055 namespace internal {
00056 size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
00057
00059 class affinity_partitioner_base_v3: no_copy {
00060 friend class tbb::affinity_partitioner;
00061 friend class tbb::interface6::internal::affinity_partition_type;
00063
00064 affinity_id* my_array;
00066 size_t my_size;
00068 affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
00070 ~affinity_partitioner_base_v3() {resize(0);}
00072
00073 void __TBB_EXPORTED_METHOD resize( unsigned factor );
00074 };
00075
00077 class partition_type_base {
00078 public:
00079 void set_affinity( task & ) {}
00080 void note_affinity( task::affinity_id ) {}
00081 task* continue_after_execute_range() {return NULL;}
00082 bool decide_whether_to_delay() {return false;}
00083 void spawn_or_delay( bool, task& b ) {
00084 task::spawn(b);
00085 }
00086 };
00087
00088 template<typename Range, typename Body, typename Partitioner> class start_scan;
00089
00090 }
00092
00093 namespace serial {
00094 namespace interface6 {
00095 template<typename Range, typename Body, typename Partitioner> class start_for;
00096 }
00097 }
00098
00099 namespace interface6 {
00101 namespace internal {
00102 using namespace tbb::internal;
00103 template<typename Range, typename Body, typename Partitioner> class start_for;
00104 template<typename Range, typename Body, typename Partitioner> class start_reduce;
00105
00107 class flag_task: public task {
00108 public:
00109 tbb::atomic<bool> my_child_stolen;
00110 flag_task() { my_child_stolen = false; }
00111 task* execute() { return NULL; }
00112 static void mark_task_stolen(task &t) {
00113 tbb::atomic<bool> &flag = static_cast<flag_task*>(t.parent())->my_child_stolen;
00114 #if TBB_USE_THREADING_TOOLS
00115
00116 flag.fetch_and_store<release>(true);
00117 #else
00118 flag = true;
00119 #endif //TBB_USE_THREADING_TOOLS
00120 }
00121 static bool is_peer_stolen(task &t) {
00122 return static_cast<flag_task*>(t.parent())->my_child_stolen;
00123 }
00124 };
00125
00127 class signal_task: public task {
00128 public:
00129 task* execute() {
00130 if( is_stolen_task() ) {
00131 flag_task::mark_task_stolen(*this);
00132 }
00133 return NULL;
00134 }
00135 };
00136
00140 typedef unsigned char depth_t;
00141
00143 template <typename T, depth_t MaxCapacity>
00144 class range_vector {
00145 depth_t my_head;
00146 depth_t my_tail;
00147 depth_t my_size;
00148 depth_t my_depth[MaxCapacity];
00149 tbb::aligned_space<T, MaxCapacity> my_pool;
00150
00151 public:
00153 range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
00154 my_depth[0] = 0;
00155 new( my_pool.begin() ) T(elem);
00156 }
00157 ~range_vector() {
00158 while( !empty() ) pop_back();
00159 }
00160 bool empty() const { return my_size == 0; }
00161 depth_t size() const { return my_size; }
00164 void split_to_fill(depth_t max_depth) {
00165 while( my_size < MaxCapacity && my_depth[my_head] < max_depth
00166 && my_pool.begin()[my_head].is_divisible() ) {
00167 depth_t prev = my_head;
00168 my_head = (my_head + 1) % MaxCapacity;
00169 new(my_pool.begin()+my_head) T(my_pool.begin()[prev]);
00170 my_pool.begin()[prev].~T();
00171 new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split());
00172 my_depth[my_head] = ++my_depth[prev];
00173 my_size++;
00174 }
00175 }
00176 void pop_back() {
00177 __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
00178 my_pool.begin()[my_head].~T();
00179 my_size--;
00180 my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
00181 }
00182 void pop_front() {
00183 __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
00184 my_pool.begin()[my_tail].~T();
00185 my_size--;
00186 my_tail = (my_tail + 1) % MaxCapacity;
00187 }
00188 T& back() {
00189 __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
00190 return my_pool.begin()[my_head];
00191 }
00192 T& front() {
00193 __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
00194 return my_pool.begin()[my_tail];
00195 }
00197 depth_t front_depth() {
00198 __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
00199 return my_depth[my_tail];
00200 }
00201 };
00202
00204 template <typename Partition>
00205 struct partition_type_base {
00206
00207 void set_affinity( task & ) {}
00208 void note_affinity( task::affinity_id ) {}
00209 bool check_being_stolen(task &) { return false; }
00210 bool check_for_demand(task &) { return false; }
00211 bool divisions_left() { return true; }
00212 bool should_create_trap() { return false; }
00213 depth_t max_depth() { return 0; }
00214 void align_depth(depth_t) { }
00215
00216 Partition& derived() { return *static_cast<Partition*>(this); }
00217 template<typename StartType>
00218 flag_task* split_work(StartType &start) {
00219 flag_task* parent_ptr = start.create_continuation();
00220 start.set_parent(parent_ptr);
00221 parent_ptr->set_ref_count(2);
00222 StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, split());
00223 start.spawn(right_work);
00224 return parent_ptr;
00225 }
00226 template<typename StartType, typename Range>
00227 void execute(StartType &start, Range &range) {
00228
00229
00230
00231
00232
00233
00234
00235 task* parent_ptr = start.parent();
00236 if( range.is_divisible() ) {
00237 if( derived().divisions_left() )
00238 do parent_ptr = split_work(start);
00239 while( range.is_divisible() && derived().divisions_left() );
00240 if( derived().should_create_trap() ) {
00241 if( parent_ptr->ref_count() > 1 ) {
00242 parent_ptr = start.create_continuation();
00243 start.set_parent(parent_ptr);
00244 } else __TBB_ASSERT(parent_ptr->ref_count() == 1, NULL);
00245 parent_ptr->set_ref_count(2);
00246 signal_task& right_signal = *new( parent_ptr->allocate_child() ) signal_task();
00247 start.spawn(right_signal);
00248 }
00249 }
00250 if( !range.is_divisible() || !derived().max_depth() )
00251 start.run_body( range );
00252 else {
00253 internal::range_vector<Range, Partition::range_pool_size> range_pool(range);
00254 do {
00255 range_pool.split_to_fill(derived().max_depth());
00256 if( derived().check_for_demand( start ) ) {
00257 if( range_pool.size() > 1 ) {
00258 parent_ptr = start.create_continuation();
00259 start.set_parent(parent_ptr);
00260 parent_ptr->set_ref_count(2);
00261 StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, range_pool.front(), range_pool.front_depth());
00262 start.spawn(right_work);
00263 range_pool.pop_front();
00264 continue;
00265 }
00266 if( range_pool.back().is_divisible() )
00267 continue;
00268 }
00269 start.run_body( range_pool.back() );
00270 range_pool.pop_back();
00271 } while( !range_pool.empty() && !start.is_cancelled() );
00272 }
00273 }
00274 };
00275
00277 template <typename Partition>
00278 struct auto_partition_type_base : partition_type_base<Partition> {
00279 size_t my_divisor;
00280 depth_t my_max_depth;
00281 auto_partition_type_base() : my_max_depth(__TBB_INIT_DEPTH) {
00282 my_divisor = tbb::internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4;
00283 __TBB_ASSERT(my_divisor, "initial value of get_initial_auto_partitioner_divisor() is not valid");
00284 }
00285 auto_partition_type_base(auto_partition_type_base &src, split) {
00286 my_max_depth = src.my_max_depth;
00287 #if __TBB_INITIAL_TASK_IMBALANCE
00288 if( src.my_divisor <= 1 ) my_divisor = 0;
00289 else my_divisor = src.my_divisor = (src.my_divisor+1u) / 2u;
00290 #else
00291 my_divisor = src.my_divisor / 2u;
00292 src.my_divisor = src.my_divisor - my_divisor;
00293 if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(src.my_divisor/my_divisor));
00294 #endif
00295 }
00296 bool check_being_stolen( task &t) {
00297 if( !my_divisor ) {
00298 my_divisor = 1;
00299 if( t.is_stolen_task() ) {
00300 #if TBB_USE_EXCEPTIONS
00301
00302 __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
00303
00304
00305
00306 #endif
00307 flag_task::mark_task_stolen(t);
00308 my_max_depth++;
00309 return true;
00310 }
00311 }
00312 return false;
00313 }
00314 bool divisions_left() {
00315 if( my_divisor > 1 ) return true;
00316 if( my_divisor && my_max_depth > 1 ) {
00317
00318 my_max_depth--;
00319 my_divisor = 0;
00320 return true;
00321 } else return false;
00322 }
00323 bool should_create_trap() {
00324 return my_divisor > 0;
00325 }
00326 bool check_for_demand(task &t) {
00327 if( flag_task::is_peer_stolen(t) ) {
00328 my_max_depth++;
00329 return true;
00330 } else return false;
00331 }
00332 void align_depth(depth_t base) {
00333 __TBB_ASSERT(base <= my_max_depth, 0);
00334 my_max_depth -= base;
00335 }
00336 depth_t max_depth() { return my_max_depth; }
00337 };
00338
00340 class affinity_partition_type : public auto_partition_type_base<affinity_partition_type> {
00341 static const unsigned factor_power = 4;
00342 static const unsigned factor = 1<<factor_power;
00343 bool my_delay;
00344 unsigned map_begin, map_end, map_mid;
00345 tbb::internal::affinity_id* my_array;
00346 void set_mid() {
00347 unsigned d = (map_end - map_begin)/2;
00348 if( d > factor )
00349 d &= 0u-factor;
00350 map_mid = map_end - d;
00351 }
00352 public:
00353 affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap ) {
00354 __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
00355 ap.resize(factor);
00356 my_array = ap.my_array;
00357 map_begin = 0;
00358 map_end = unsigned(ap.my_size);
00359 set_mid();
00360 my_delay = true;
00361 my_divisor /= __TBB_INITIAL_CHUNKS;
00362 my_max_depth = factor_power+1;
00363 __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
00364 }
00365 affinity_partition_type(affinity_partition_type& p, split)
00366 : auto_partition_type_base<affinity_partition_type>(p, split()), my_array(p.my_array) {
00367 __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begin)%factor==0, NULL );
00368 map_end = p.map_end;
00369 map_begin = p.map_end = p.map_mid;
00370 set_mid(); p.set_mid();
00371 my_delay = p.my_delay;
00372 }
00373 void set_affinity( task &t ) {
00374 if( map_begin<map_end )
00375 t.set_affinity( my_array[map_begin] );
00376 }
00377 void note_affinity( task::affinity_id id ) {
00378 if( map_begin<map_end )
00379 my_array[map_begin] = id;
00380 }
00381 bool check_for_demand( task &t ) {
00382 if( !my_delay ) {
00383 if( map_mid<map_end ) {
00384 __TBB_ASSERT(my_max_depth>__TBB_Log2(map_end-map_mid), 0);
00385 return true;
00386 }
00387 if( flag_task::is_peer_stolen(t) ) {
00388 my_max_depth++;
00389 return true;
00390 }
00391 } else my_delay = false;
00392 return false;
00393 }
00394 bool divisions_left() {
00395 return my_divisor > 1;
00396 }
00397 bool should_create_trap() {
00398 return true;
00399 }
00400 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
00401 };
00402
00403 class auto_partition_type: public auto_partition_type_base<auto_partition_type> {
00404 public:
00405 auto_partition_type( const auto_partitioner& ) {}
00406 auto_partition_type( auto_partition_type& src, split)
00407 : auto_partition_type_base<auto_partition_type>(src, split()) {}
00408 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
00409 };
00410
00411 class simple_partition_type: public partition_type_base<simple_partition_type> {
00412 public:
00413 simple_partition_type( const simple_partitioner& ) {}
00414 simple_partition_type( const simple_partition_type&, split ) {}
00416 template<typename StartType, typename Range>
00417 void execute(StartType &start, Range &range) {
00418 while( range.is_divisible() )
00419 split_work( start );
00420 start.run_body( range );
00421 }
00422
00423 };
00424
00426 class old_auto_partition_type: public tbb::internal::partition_type_base {
00427 size_t num_chunks;
00428 static const size_t VICTIM_CHUNKS = 4;
00429 public:
00430 bool should_execute_range(const task &t) {
00431 if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
00432 num_chunks = VICTIM_CHUNKS;
00433 return num_chunks==1;
00434 }
00435 old_auto_partition_type( const auto_partitioner& )
00436 : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
00437 old_auto_partition_type( const affinity_partitioner& )
00438 : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
00439 old_auto_partition_type( old_auto_partition_type& pt, split ) {
00440 num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
00441 }
00442 };
00443
00444 }
00446 }
00447
00449
00451 class simple_partitioner {
00452 public:
00453 simple_partitioner() {}
00454 private:
00455 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00456 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00457 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00458 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00459
00460 class partition_type: public internal::partition_type_base {
00461 public:
00462 bool should_execute_range(const task& ) {return false;}
00463 partition_type( const simple_partitioner& ) {}
00464 partition_type( const partition_type&, split ) {}
00465 };
00466
00467 typedef interface6::internal::simple_partition_type task_partition_type;
00468 };
00469
00471
00474 class auto_partitioner {
00475 public:
00476 auto_partitioner() {}
00477
00478 private:
00479 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00480 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00481 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00482 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00483
00484 typedef interface6::internal::old_auto_partition_type partition_type;
00485
00486 typedef interface6::internal::auto_partition_type task_partition_type;
00487 };
00488
00490 class affinity_partitioner: internal::affinity_partitioner_base_v3 {
00491 public:
00492 affinity_partitioner() {}
00493
00494 private:
00495 template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00496 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00497 template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00498 template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00499
00500 typedef interface6::internal::old_auto_partition_type partition_type;
00501
00502 typedef interface6::internal::affinity_partition_type task_partition_type;
00503 };
00504
00505 }
00506
00507 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
00508 #pragma warning (pop)
00509 #endif // warning 4244 is back
00510 #undef __TBB_INITIAL_CHUNKS
00511 #undef __TBB_RANGE_POOL_CAPACITY
00512 #undef __TBB_INIT_DEPTH
00513 #endif