parallel_reduce.h

00001 /*
00002     Copyright 2005-2012 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_parallel_reduce_H
00022 #define __TBB_parallel_reduce_H
00023 
00024 #include <new>
00025 #include "task.h"
00026 #include "aligned_space.h"
00027 #include "partitioner.h"
00028 #include "tbb_profiling.h"
00029 
00030 namespace tbb {
00031 
00032 namespace interface6 {
00034 namespace internal {
00035 
00036     using namespace tbb::internal;
00037 
00039     enum {
00040         root_task, left_child, right_child
00041     };
00042 
00044     typedef char reduction_context;
00045 
00047 
00048     template<typename Body>
00049     class finish_reduce: public flag_task {
00051         bool has_right_zombie;
00052         const reduction_context my_context;
00053         Body* my_body;
00054         aligned_space<Body,1> zombie_space;
00055         finish_reduce( reduction_context context_ ) :
00056             has_right_zombie(false), // TODO: substitute by flag_task::child_stolen?
00057             my_context(context_),
00058             my_body(NULL)
00059         {
00060         }
00061         task* execute() {
00062             if( has_right_zombie ) {
00063                 // Right child was stolen.
00064                 Body* s = zombie_space.begin();
00065                 my_body->join( *s );
00066                 s->~Body();
00067             }
00068             if( my_context==left_child )
00069                 itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
00070             return NULL;
00071         }
00072         template<typename Range,typename Body_, typename Partitioner>
00073         friend class start_reduce;
00074     };
00075 
00077 
00078     template<typename Range, typename Body, typename Partitioner>
00079     class start_reduce: public task {
00080         typedef finish_reduce<Body> finish_type;
00081         Body* my_body;
00082         Range my_range;
00083         typename Partitioner::task_partition_type my_partition;
00084         reduction_context my_context; // TODO: factor out into start_reduce_base
00085         /*override*/ task* execute();
00086         template<typename Body_>
00087         friend class finish_reduce;
00088 
00089 public:
00091         start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
00092             my_body(body),
00093             my_range(range),
00094             my_partition(partitioner),
00095             my_context(root_task)
00096         {
00097         }
00099 
00100         start_reduce( start_reduce& parent_, split ) :
00101             my_body(parent_.my_body),
00102             my_range(parent_.my_range,split()),
00103             my_partition(parent_.my_partition,split()),
00104             my_context(right_child)
00105         {
00106             my_partition.set_affinity(*this);
00107             parent_.my_context = left_child;
00108         }
00110 
00111         start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
00112             my_body(parent_.my_body),
00113             my_range(r),
00114             my_partition(parent_.my_partition,split()),
00115             my_context(right_child)
00116         {
00117             my_partition.set_affinity(*this);
00118             my_partition.align_depth( d );
00119             parent_.my_context = left_child;
00120         }
00122         /*override*/ void note_affinity( affinity_id id ) {
00123             my_partition.note_affinity( id );
00124         }
00125         static void run( const Range& range, Body& body, Partitioner& partitioner ) {
00126             if( !range.empty() ) {
00127 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00128                 task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
00129 #else
00130                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00131                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00132                 task_group_context context;
00133                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00134 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00135             }
00136         }
00137 #if __TBB_TASK_GROUP_CONTEXT
00138         static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
00139             if( !range.empty() )
00140                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
00141         }
00142 #endif /* __TBB_TASK_GROUP_CONTEXT */
00144         finish_type *create_continuation() {
00145             return new( allocate_continuation() ) finish_type(my_context);
00146         }
00148         void run_body( Range &r ) { (*my_body)( r ); }
00149     };
00150     template<typename Range, typename Body, typename Partitioner>
00151     task* start_reduce<Range,Body,Partitioner>::execute() {
00152         my_partition.check_being_stolen( *this );
00153         if( my_context==right_child ) {
00154             finish_type* parent_ptr = static_cast<finish_type*>(parent());
00155             if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
00156                 my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
00157                 parent_ptr->has_right_zombie = true;
00158             }
00159         } else __TBB_ASSERT(my_context==root_task,NULL);// because left leaf spawns right leafs without recycling
00160         my_partition.execute(*this, my_range);
00161         if( my_context==left_child ) {
00162             finish_type* parent_ptr = static_cast<finish_type*>(parent());
00163             __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
00164             itt_store_word_with_release(parent_ptr->my_body, my_body );
00165         }
00166         return NULL;
00167     }
00168 
00170 
00171     template<typename Body>
00172     class finish_deterministic_reduce: public task {
00173         Body &my_left_body;
00174         Body my_right_body;
00175 
00176         finish_deterministic_reduce( Body &body ) :
00177             my_left_body( body ),
00178             my_right_body( body, split() )
00179         {
00180         }
00181         task* execute() {
00182             my_left_body.join( my_right_body );
00183             return NULL;
00184         }
00185         template<typename Range,typename Body_>
00186         friend class start_deterministic_reduce;
00187     };
00188 
00190 
00191     template<typename Range, typename Body>
00192     class start_deterministic_reduce: public task {
00193         typedef finish_deterministic_reduce<Body> finish_type;
00194         Body &my_body;
00195         Range my_range;
00196         /*override*/ task* execute();
00197 
00199         start_deterministic_reduce( const Range& range, Body& body ) :
00200             my_body( body ),
00201             my_range( range )
00202         {
00203         }
00205 
00206         start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c ) :
00207             my_body( c.my_right_body ),
00208             my_range( parent_.my_range, split() )
00209         {
00210         }
00211 
00212 public:
00213         static void run( const Range& range, Body& body ) {
00214             if( !range.empty() ) {
00215 #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
00216                 task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body) );
00217 #else
00218                 // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
00219                 // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
00220                 task_group_context context;
00221                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
00222 #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
00223             }
00224         }
00225 #if __TBB_TASK_GROUP_CONTEXT
00226         static void run( const Range& range, Body& body, task_group_context& context ) {
00227             if( !range.empty() )
00228                 task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body) );
00229         }
00230 #endif /* __TBB_TASK_GROUP_CONTEXT */
00231     };
00232 
00233     template<typename Range, typename Body>
00234     task* start_deterministic_reduce<Range,Body>::execute() {
00235         if( !my_range.is_divisible() ) {
00236             my_body( my_range );
00237             return NULL;
00238         } else {
00239             finish_type& c = *new( allocate_continuation() ) finish_type( my_body );
00240             recycle_as_child_of(c);
00241             c.set_ref_count(2);
00242             start_deterministic_reduce& b = *new( c.allocate_child() ) start_deterministic_reduce( *this, c );
00243             task::spawn(b);
00244             return this;
00245         }
00246     }
00247 } // namespace internal
00249 } //namespace interfaceX
00250 
00252 namespace internal {
00253     using interface6::internal::start_reduce;
00254     using interface6::internal::start_deterministic_reduce;
00256 
00260     template<typename Range, typename Value, typename RealBody, typename Reduction>
00261     class lambda_reduce_body {
00262 
00263 //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
00264 //       (might require some performance measurements)
00265 
00266         const Value&     identity_element;
00267         const RealBody&  my_real_body;
00268         const Reduction& my_reduction;
00269         Value            my_value;
00270         lambda_reduce_body& operator= ( const lambda_reduce_body& other );
00271     public:
00272         lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
00273             : identity_element(identity)
00274             , my_real_body(body)
00275             , my_reduction(reduction)
00276             , my_value(identity)
00277         { }
00278         lambda_reduce_body( const lambda_reduce_body& other )
00279             : identity_element(other.identity_element)
00280             , my_real_body(other.my_real_body)
00281             , my_reduction(other.my_reduction)
00282             , my_value(other.my_value)
00283         { }
00284         lambda_reduce_body( lambda_reduce_body& other, tbb::split )
00285             : identity_element(other.identity_element)
00286             , my_real_body(other.my_real_body)
00287             , my_reduction(other.my_reduction)
00288             , my_value(other.identity_element)
00289         { }
00290         void operator()(Range& range) {
00291             my_value = my_real_body(range, const_cast<const Value&>(my_value));
00292         }
00293         void join( lambda_reduce_body& rhs ) {
00294             my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
00295         }
00296         Value result() const {
00297             return my_value;
00298         }
00299     };
00300 
00301 } // namespace internal
00303 
00304 // Requirements on Range concept are documented in blocked_range.h
00305 
00324 
00326 
00327 template<typename Range, typename Body>
00328 void parallel_reduce( const Range& range, Body& body ) {
00329     internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
00330 }
00331 
00333 
00334 template<typename Range, typename Body>
00335 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
00336     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
00337 }
00338 
00340 
00341 template<typename Range, typename Body>
00342 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
00343     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
00344 }
00345 
00347 
00348 template<typename Range, typename Body>
00349 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
00350     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
00351 }
00352 
00353 #if __TBB_TASK_GROUP_CONTEXT
00355 
00356 template<typename Range, typename Body>
00357 void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
00358     internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
00359 }
00360 
00362 
00363 template<typename Range, typename Body>
00364 void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
00365     internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
00366 }
00367 
00369 
00370 template<typename Range, typename Body>
00371 void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
00372     internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
00373 }
00374 #endif /* __TBB_TASK_GROUP_CONTEXT */
00375 
00379 
00380 
00381 template<typename Range, typename Value, typename RealBody, typename Reduction>
00382 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00383     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00384     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
00385                           ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
00386     return body.result();
00387 }
00388 
00390 
00391 template<typename Range, typename Value, typename RealBody, typename Reduction>
00392 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00393                        const simple_partitioner& partitioner ) {
00394     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00395     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00396                           ::run(range, body, partitioner );
00397     return body.result();
00398 }
00399 
00401 
00402 template<typename Range, typename Value, typename RealBody, typename Reduction>
00403 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00404                        const auto_partitioner& partitioner ) {
00405     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00406     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00407                           ::run( range, body, partitioner );
00408     return body.result();
00409 }
00410 
00412 
00413 template<typename Range, typename Value, typename RealBody, typename Reduction>
00414 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00415                        affinity_partitioner& partitioner ) {
00416     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00417     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00418                                         ::run( range, body, partitioner );
00419     return body.result();
00420 }
00421 
00422 #if __TBB_TASK_GROUP_CONTEXT
00424 
00425 template<typename Range, typename Value, typename RealBody, typename Reduction>
00426 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00427                        const simple_partitioner& partitioner, task_group_context& context ) {
00428     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00429     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
00430                           ::run( range, body, partitioner, context );
00431     return body.result();
00432 }
00433 
00435 
00436 template<typename Range, typename Value, typename RealBody, typename Reduction>
00437 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00438                        const auto_partitioner& partitioner, task_group_context& context ) {
00439     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00440     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
00441                           ::run( range, body, partitioner, context );
00442     return body.result();
00443 }
00444 
00446 
00447 template<typename Range, typename Value, typename RealBody, typename Reduction>
00448 Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00449                        affinity_partitioner& partitioner, task_group_context& context ) {
00450     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00451     internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
00452                                         ::run( range, body, partitioner, context );
00453     return body.result();
00454 }
00455 #endif /* __TBB_TASK_GROUP_CONTEXT */
00456 
00458 
00459 template<typename Range, typename Body>
00460 void parallel_deterministic_reduce( const Range& range, Body& body ) {
00461     internal::start_deterministic_reduce<Range,Body>::run( range, body );
00462 }
00463 
00464 #if __TBB_TASK_GROUP_CONTEXT
00466 
00467 template<typename Range, typename Body>
00468 void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
00469     internal::start_deterministic_reduce<Range,Body>::run( range, body, context );
00470 }
00471 #endif /* __TBB_TASK_GROUP_CONTEXT */
00472 
00476 
00477 
00478 template<typename Range, typename Value, typename RealBody, typename Reduction>
00479 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
00480     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00481     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
00482                           ::run(range, body);
00483     return body.result();
00484 }
00485 
00486 #if __TBB_TASK_GROUP_CONTEXT
00488 
00489 template<typename Range, typename Value, typename RealBody, typename Reduction>
00490 Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
00491                        task_group_context& context ) {
00492     internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
00493     internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction> >
00494                           ::run( range, body, context );
00495     return body.result();
00496 }
00497 #endif /* __TBB_TASK_GROUP_CONTEXT */
00498 
00499 
00500 } // namespace tbb
00501 
00502 #endif /* __TBB_parallel_reduce_H */
00503 

Copyright © 2005-2012 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.