partitioner.h

00001 /*
00002     Copyright 2005-2012 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_partitioner_H
00022 #define __TBB_partitioner_H
00023 
00024 #ifndef __TBB_INITIAL_CHUNKS
00025 #define __TBB_INITIAL_CHUNKS 2
00026 #endif
00027 #ifndef __TBB_RANGE_POOL_CAPACITY
00028 #define __TBB_RANGE_POOL_CAPACITY 8
00029 #endif
00030 #ifndef __TBB_INIT_DEPTH
00031 #define __TBB_INIT_DEPTH 5
00032 #endif
00033 
00034 #include "task.h"
00035 #include "aligned_space.h"
00036 #include "atomic.h"
00037 
00038 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
00039     // Workaround for overzealous compiler warnings
00040     #pragma warning (push)
00041     #pragma warning (disable: 4244)
00042 #endif
00043 
00044 namespace tbb {
00045 
00046 class auto_partitioner;
00047 class simple_partitioner;
00048 class affinity_partitioner;
00049 namespace interface6 {
00050     namespace internal {
00051         class affinity_partition_type;
00052     }
00053 }
00054 
00055 namespace internal {
00056 size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
00057 
00059 class affinity_partitioner_base_v3: no_copy {
00060     friend class tbb::affinity_partitioner;
00061     friend class tbb::interface6::internal::affinity_partition_type;
00063 
00064     affinity_id* my_array;
00066     size_t my_size;
00068     affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
00070     ~affinity_partitioner_base_v3() {resize(0);}
00072 
00073     void __TBB_EXPORTED_METHOD resize( unsigned factor );
00074 };
00075 
00077 class partition_type_base {
00078 public:
00079     void set_affinity( task & ) {}
00080     void note_affinity( task::affinity_id ) {}
00081     task* continue_after_execute_range() {return NULL;}
00082     bool decide_whether_to_delay() {return false;}
00083     void spawn_or_delay( bool, task& b ) {
00084         task::spawn(b);
00085     }
00086 };
00087 
00088 template<typename Range, typename Body, typename Partitioner> class start_scan;
00089 
00090 } // namespace internal
00092 
00093 namespace serial {
00094 namespace interface6 {
00095 template<typename Range, typename Body, typename Partitioner> class start_for;
00096 }
00097 }
00098 
00099 namespace interface6 {
00101 namespace internal {
00102 using namespace tbb::internal;
00103 template<typename Range, typename Body, typename Partitioner> class start_for;
00104 template<typename Range, typename Body, typename Partitioner> class start_reduce;
00105 
00107 class flag_task: public task {
00108 public:
00109     tbb::atomic<bool> my_child_stolen;
00110     flag_task() { my_child_stolen = false; }
00111     task* execute() { return NULL; }
00112     static void mark_task_stolen(task &t) {
00113         tbb::atomic<bool> &flag = static_cast<flag_task*>(t.parent())->my_child_stolen;
00114 #if TBB_USE_THREADING_TOOLS
00115         // Threading tools respect lock prefix but report false-positive data-race via plain store
00116         flag.fetch_and_store<release>(true);
00117 #else
00118         flag = true;
00119 #endif //TBB_USE_THREADING_TOOLS
00120     }
00121     static bool is_peer_stolen(task &t) {
00122         return static_cast<flag_task*>(t.parent())->my_child_stolen;
00123     }
00124 };
00125 
00127 class signal_task: public task {
00128 public:
00129     task* execute() {
00130         if( is_stolen_task() ) {
00131             flag_task::mark_task_stolen(*this);
00132         }
00133         return NULL;
00134     }
00135 };
00136 
00140 typedef unsigned char depth_t;
00141 
00143 template <typename T, depth_t MaxCapacity>
00144 class range_vector {
00145     depth_t my_head;
00146     depth_t my_tail;
00147     depth_t my_size;
00148     depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
00149     tbb::aligned_space<T, MaxCapacity> my_pool;
00150 
00151 public:
00153     range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
00154         my_depth[0] = 0;
00155         new( my_pool.begin() ) T(elem);//TODO: std::move?
00156     }
00157     ~range_vector() {
00158         while( !empty() ) pop_back();
00159     }
00160     bool empty() const { return my_size == 0; }
00161     depth_t size() const { return my_size; }
00164     void split_to_fill(depth_t max_depth) {
00165         while( my_size < MaxCapacity && my_depth[my_head] < max_depth
00166           && my_pool.begin()[my_head].is_divisible() ) {
00167             depth_t prev = my_head;
00168             my_head = (my_head + 1) % MaxCapacity;
00169             new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
00170             my_pool.begin()[prev].~T(); // instead of assignment
00171             new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split()); // do 'inverse' split
00172             my_depth[my_head] = ++my_depth[prev];
00173             my_size++;
00174         }
00175     }
00176     void pop_back() {
00177         __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
00178         my_pool.begin()[my_head].~T();
00179         my_size--;
00180         my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
00181     }
00182     void pop_front() {
00183         __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
00184         my_pool.begin()[my_tail].~T();
00185         my_size--;
00186         my_tail = (my_tail + 1) % MaxCapacity;
00187     }
00188     T& back() {
00189         __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
00190         return my_pool.begin()[my_head];
00191     }
00192     T& front() {
00193         __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
00194         return my_pool.begin()[my_tail];
00195     }
00197     depth_t front_depth() {
00198         __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
00199         return my_depth[my_tail];
00200     }
00201 };
00202 
00204 template <typename Partition>
00205 struct partition_type_base {
00206     // decision makers
00207     void set_affinity( task & ) {}
00208     void note_affinity( task::affinity_id ) {}
00209     bool check_being_stolen(task &) { return false; } // part of old should_execute_range()
00210     bool check_for_demand(task &) { return false; }
00211     bool divisions_left() { return true; } // part of old should_execute_range()
00212     bool should_create_trap() { return false; }
00213     depth_t max_depth() { return 0; }
00214     void align_depth(depth_t) { }
00215     // common function blocks
00216     Partition& derived() { return *static_cast<Partition*>(this); }
00217     template<typename StartType>
00218     flag_task* split_work(StartType &start) {
00219         flag_task* parent_ptr = start.create_continuation(); // the type here is to express expectation
00220         start.set_parent(parent_ptr);
00221         parent_ptr->set_ref_count(2);
00222         StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, split());
00223         start.spawn(right_work);
00224         return parent_ptr;
00225     }
00226     template<typename StartType, typename Range>
00227     void execute(StartType &start, Range &range) {
00228         // The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
00229         // [If this task is stolen, adjust depth and divisions if necessary, set flag].
00230         // If range is divisible {
00231         //    Spread the work while [initial divisions left];
00232         //    Create trap task [if necessary];
00233         // }
00234         // If not divisible or [max depth is reached], execute, else do the range pool part
00235         task* parent_ptr = start.parent();
00236         if( range.is_divisible() ) {
00237             if( derived().divisions_left() )
00238                 do parent_ptr = split_work(start); // split until divisions_left()
00239                 while( range.is_divisible() && derived().divisions_left() );
00240             if( derived().should_create_trap() ) { // only for range pool
00241                 if( parent_ptr->ref_count() > 1 ) { // create new parent if necessary
00242                     parent_ptr = start.create_continuation();
00243                     start.set_parent(parent_ptr);
00244                 } else __TBB_ASSERT(parent_ptr->ref_count() == 1, NULL);
00245                 parent_ptr->set_ref_count(2); // safe because parent has only one reference
00246                 signal_task& right_signal = *new( parent_ptr->allocate_child() ) signal_task();
00247                 start.spawn(right_signal); // pure signal is to avoid deep recursion in the end
00248             }
00249         }
00250         if( !range.is_divisible() || !derived().max_depth() )
00251             start.run_body( range ); // simple partitioner goes always here
00252         else { // do range pool
00253             internal::range_vector<Range, Partition::range_pool_size> range_pool(range);
00254             do {
00255                 range_pool.split_to_fill(derived().max_depth()); // fill range pool
00256                 if( derived().check_for_demand( start ) ) {
00257                     if( range_pool.size() > 1 ) {
00258                         parent_ptr = start.create_continuation();
00259                         start.set_parent(parent_ptr);
00260                         parent_ptr->set_ref_count(2);
00261                         StartType& right_work = *new( parent_ptr->allocate_child() ) StartType(start, range_pool.front(), range_pool.front_depth());
00262                         start.spawn(right_work);
00263                         range_pool.pop_front();
00264                         continue;
00265                     }
00266                     if( range_pool.back().is_divisible() ) // was not enough depth to fork a task
00267                         continue; // note: check_for_demand() should guarantee increasing max_depth() next time
00268                 }
00269                 start.run_body( range_pool.back() );
00270                 range_pool.pop_back();
00271             } while( !range_pool.empty() && !start.is_cancelled() );
00272         }
00273     }
00274 };
00275 
00277 template <typename Partition>
00278 struct auto_partition_type_base : partition_type_base<Partition> {
00279     size_t my_divisor;
00280     depth_t my_max_depth;
00281     auto_partition_type_base() : my_max_depth(__TBB_INIT_DEPTH) {
00282         my_divisor = tbb::internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4;
00283         __TBB_ASSERT(my_divisor, "initial value of get_initial_auto_partitioner_divisor() is not valid");
00284     }
00285     auto_partition_type_base(auto_partition_type_base &src, split) {
00286         my_max_depth = src.my_max_depth;
00287 #if __TBB_INITIAL_TASK_IMBALANCE
00288         if( src.my_divisor <= 1 ) my_divisor = 0;
00289         else my_divisor = src.my_divisor = (src.my_divisor+1u) / 2u;
00290 #else
00291         my_divisor = src.my_divisor / 2u;
00292         src.my_divisor = src.my_divisor - my_divisor; // TODO: check the effect separately
00293         if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(src.my_divisor/my_divisor));
00294 #endif
00295     }
00296     bool check_being_stolen( task &t) { // part of old should_execute_range()
00297         if( !my_divisor ) { // if not from the top P tasks of binary tree
00298             my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
00299             if( t.is_stolen_task() ) {
00300 #if TBB_USE_EXCEPTIONS
00301                 // RTTI is available, check whether the cast is valid
00302                 __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
00303                 // correctness of the cast relies on avoiding the root task for which:
00304                 // - initial value of my_divisor != 0 (protected by separate assertion)
00305                 // - is_stolen_task() always returns false for the root task.
00306 #endif
00307                 flag_task::mark_task_stolen(t);
00308                 my_max_depth++;
00309                 return true;
00310             }
00311         }
00312         return false;
00313     }
00314     bool divisions_left() { // part of old should_execute_range()
00315         if( my_divisor > 1 ) return true;
00316         if( my_divisor && my_max_depth > 1 ) { // can split the task and once more internally. TODO: on-stack flag instead
00317             // keep same fragmentation while splitting for the local task pool
00318             my_max_depth--;
00319             my_divisor = 0; // decrease max_depth once per task
00320             return true;
00321         } else return false;
00322     }
00323     bool should_create_trap() {
00324         return my_divisor > 0;
00325     }
00326     bool check_for_demand(task &t) {
00327         if( flag_task::is_peer_stolen(t) ) {
00328             my_max_depth++;
00329             return true;
00330         } else return false;
00331     }
00332     void align_depth(depth_t base) {
00333         __TBB_ASSERT(base <= my_max_depth, 0);
00334         my_max_depth -= base;
00335     }
00336     depth_t max_depth() { return my_max_depth; }
00337 };
00338 
00340 class affinity_partition_type : public auto_partition_type_base<affinity_partition_type> {
00341     static const unsigned factor_power = 4;
00342     static const unsigned factor = 1<<factor_power;
00343     bool my_delay;
00344     unsigned map_begin, map_end, map_mid;
00345     tbb::internal::affinity_id* my_array;
00346     void set_mid() {
00347         unsigned d = (map_end - map_begin)/2; // we could add 1 but it is rather for LIFO affinity
00348         if( d > factor )
00349             d &= 0u-factor;
00350         map_mid = map_end - d;
00351     }
00352 public:
00353     affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap ) {
00354         __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
00355         ap.resize(factor);
00356         my_array = ap.my_array;
00357         map_begin = 0;
00358         map_end = unsigned(ap.my_size);
00359         set_mid();
00360         my_delay = true;
00361         my_divisor /= __TBB_INITIAL_CHUNKS; // let exactly P tasks to be distributed across workers
00362         my_max_depth = factor_power+1; // the first factor_power ranges will be spawned, and >=1 ranges should be left
00363         __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
00364     }
00365     affinity_partition_type(affinity_partition_type& p, split)
00366         : auto_partition_type_base<affinity_partition_type>(p, split()), my_array(p.my_array) {
00367         __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begin)%factor==0, NULL );
00368         map_end = p.map_end;
00369         map_begin = p.map_end = p.map_mid;
00370         set_mid(); p.set_mid();
00371         my_delay = p.my_delay;
00372     }
00373     void set_affinity( task &t ) {
00374         if( map_begin<map_end )
00375             t.set_affinity( my_array[map_begin] );
00376     }
00377     void note_affinity( task::affinity_id id ) {
00378         if( map_begin<map_end )
00379             my_array[map_begin] = id;
00380     }
00381     bool check_for_demand( task &t ) {
00382         if( !my_delay ) {
00383             if( map_mid<map_end ) {
00384                 __TBB_ASSERT(my_max_depth>__TBB_Log2(map_end-map_mid), 0);
00385                 return true;// do not do my_max_depth++ here, but be sure my_max_depth is big enough
00386             }
00387             if( flag_task::is_peer_stolen(t) ) {
00388                 my_max_depth++;
00389                 return true;
00390             }
00391         } else my_delay = false;
00392         return false;
00393     }
00394     bool divisions_left() { // part of old should_execute_range()
00395         return my_divisor > 1;
00396     }
00397     bool should_create_trap() {
00398         return true; // TODO: rethink for the stage after memorizing level
00399     }
00400     static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
00401 };
00402 
00403 class auto_partition_type: public auto_partition_type_base<auto_partition_type> {
00404 public:
00405     auto_partition_type( const auto_partitioner& ) {}
00406     auto_partition_type( auto_partition_type& src, split)
00407       : auto_partition_type_base<auto_partition_type>(src, split()) {}
00408     static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
00409 };
00410 
00411 class simple_partition_type: public partition_type_base<simple_partition_type> {
00412 public:
00413     simple_partition_type( const simple_partitioner& ) {}
00414     simple_partition_type( const simple_partition_type&, split ) {}
00416     template<typename StartType, typename Range>
00417     void execute(StartType &start, Range &range) {
00418         while( range.is_divisible() )
00419             split_work( start );
00420         start.run_body( range );
00421     }
00422     //static const unsigned range_pool_size = 1; - not necessary because execute() is overridden
00423 };
00424 
00426 class old_auto_partition_type: public tbb::internal::partition_type_base {
00427     size_t num_chunks;
00428     static const size_t VICTIM_CHUNKS = 4;
00429 public:
00430     bool should_execute_range(const task &t) {
00431         if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
00432             num_chunks = VICTIM_CHUNKS;
00433         return num_chunks==1;
00434     }
00435     old_auto_partition_type( const auto_partitioner& )
00436       : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
00437     old_auto_partition_type( const affinity_partitioner& )
00438       : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
00439     old_auto_partition_type( old_auto_partition_type& pt, split ) {
00440         num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
00441     }
00442 };
00443 
00444 } // namespace interfaceX::internal
00446 } // namespace interfaceX
00447 
00449 
00451 class simple_partitioner {
00452 public:
00453     simple_partitioner() {}
00454 private:
00455     template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00456     template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00457     template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00458     template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00459     // backward compatibility
00460     class partition_type: public internal::partition_type_base {
00461     public:
00462         bool should_execute_range(const task& ) {return false;}
00463         partition_type( const simple_partitioner& ) {}
00464         partition_type( const partition_type&, split ) {}
00465     };
00466     // new implementation just extends existing interface
00467     typedef interface6::internal::simple_partition_type task_partition_type;
00468 };
00469 
00471 
00474 class auto_partitioner {
00475 public:
00476     auto_partitioner() {}
00477 
00478 private:
00479     template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00480     template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00481     template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00482     template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00483     // backward compatibility
00484     typedef interface6::internal::old_auto_partition_type partition_type;
00485     // new implementation just extends existing interface
00486     typedef interface6::internal::auto_partition_type task_partition_type;
00487 };
00488 
00490 class affinity_partitioner: internal::affinity_partitioner_base_v3 {
00491 public:
00492     affinity_partitioner() {}
00493 
00494 private:
00495     template<typename Range, typename Body, typename Partitioner> friend class serial::interface6::start_for;
00496     template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_for;
00497     template<typename Range, typename Body, typename Partitioner> friend class interface6::internal::start_reduce;
00498     template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
00499     // backward compatibility - for parallel_scan only
00500     typedef interface6::internal::old_auto_partition_type partition_type;
00501     // new implementation just extends existing interface
00502     typedef interface6::internal::affinity_partition_type task_partition_type;
00503 };
00504 
00505 } // namespace tbb
00506 
00507 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
00508     #pragma warning (pop)
00509 #endif // warning 4244 is back
00510 #undef __TBB_INITIAL_CHUNKS
00511 #undef __TBB_RANGE_POOL_CAPACITY
00512 #undef __TBB_INIT_DEPTH
00513 #endif /* __TBB_partitioner_H */

Copyright © 2005-2012 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.